diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py index b14f53c..cd5f9d7 100644 --- a/handlers/messages_routing.py +++ b/handlers/messages_routing.py @@ -92,7 +92,7 @@ async def messages_routing(msg, state): except Exception: pass else: - toxic_score = detector(text) + toxic_score = detector(normalize(text)) toxic_perc = math.floor(toxic_score * 100) await redis.set(f"toxic:{cid}", mid) await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60 * 60 * 24 * 3) diff --git a/nlp/normalize.py b/nlp/normalize.py index 11ab692..1493dec 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -44,8 +44,10 @@ def normalize(text): """ Normalize English text to resemble Russian characters. """ + t = text.replace(" ", " ").replace(" ", " ").replace(" ", " ") + # Segment the text first - t = segment_text(text.replace(" ", " ").replace(" ", " ").replace(" ", " ")) + # t = segment_text(t) t = t.lower()