nosegm
This commit is contained in:
@@ -5,10 +5,11 @@ from state.scan import get_average_pattern
|
||||
from bot.api import telegram_api, download_file
|
||||
from bot.config import FEEDBACK_CHAT_ID
|
||||
from handlers.handle_private import handle_private
|
||||
from nlp.segment_text import segment_text
|
||||
from nlp.toxicity_detector import detector
|
||||
from nlp.normalize import normalize
|
||||
from nlp.ocr import ocr_recognize
|
||||
from nlp.stopwords_detector import check_stopwords
|
||||
|
||||
|
||||
logger = logging.getLogger("handlers.messages_routing")
|
||||
|
||||
@@ -110,20 +111,22 @@ async def messages_routing(msg, state):
|
||||
text += '\n'
|
||||
|
||||
normalized_text = normalize(text)
|
||||
segmented_text = segment_text(normalized_text)
|
||||
toxic_score = detector(segmented_text)
|
||||
toxic_score = detector(normalized_text)
|
||||
toxic_perc = math.floor(toxic_score * 100)
|
||||
logger.info(f"\segmented_text: {segmented_text}\ntoxic: {toxic_perc}%")
|
||||
logger.info(f"\text: {normalized_text}\ntoxic: {toxic_perc}%")
|
||||
|
||||
nospaces_text = text.replace(' ', '')
|
||||
nospaces_normalized_text = normalize(nospaces_text)
|
||||
nospaces_segmented_text = segment_text(nospaces_normalized_text)
|
||||
nospaces_text_score = detector(nospaces_segmented_text)
|
||||
nospaces_text_perc = math.floor(nospaces_text_score * 100)
|
||||
logger.info(f"\nospaces_segmented_text: {nospaces_segmented_text}\nnospaces_toxic: {toxic_perc}%")
|
||||
nospaces_text = text.replace(" ", "")
|
||||
if nospaces_text != text:
|
||||
nospaces_normalized_text = normalize(nospaces_text)
|
||||
nospaces_text_score = detector(nospaces_normalized_text)
|
||||
nospaces_text_perc = math.floor(nospaces_text_score * 100)
|
||||
if check_stopwords(nospaces_normalized_text):
|
||||
logger.info('stopword detected with no spaces, toxicity +40%')
|
||||
nospaces_text_perc += 40
|
||||
logger.info(f"\nospaces_text: {nospaces_normalized_text}\nnospaces_toxic: {nospaces_text_perc}%")
|
||||
|
||||
if (nospaces_text != text and nospaces_text_score > toxic_score) or nospaces_text_perc > 95:
|
||||
text_perc = nospaces_text_perc
|
||||
if nospaces_text_score > toxic_score or nospaces_text_perc > 95:
|
||||
text_perc = nospaces_text_perc
|
||||
|
||||
await redis.set(f"toxic:{cid}", mid)
|
||||
await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60 * 60 * 24 * 3)
|
||||
|
Reference in New Issue
Block a user