From c9728e14a7a59784543762ce44d1052e0869e89e Mon Sep 17 00:00:00 2001 From: Untone Date: Fri, 27 Sep 2024 13:39:40 +0300 Subject: [PATCH] average-stabw --- handlers/messages_routing.py | 2 +- nlp/normalize.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py index b14f53c..cd5f9d7 100644 --- a/handlers/messages_routing.py +++ b/handlers/messages_routing.py @@ -92,7 +92,7 @@ async def messages_routing(msg, state): except Exception: pass else: - toxic_score = detector(text) + toxic_score = detector(normalize(text)) toxic_perc = math.floor(toxic_score * 100) await redis.set(f"toxic:{cid}", mid) await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60 * 60 * 24 * 3) diff --git a/nlp/normalize.py b/nlp/normalize.py index 11ab692..1493dec 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -44,8 +44,10 @@ def normalize(text): """ Normalize English text to resemble Russian characters. """ + t = text.replace(" ", " ").replace(" ", " ").replace(" ", " ") + # Segment the text first - t = segment_text(text.replace(" ", " ").replace(" ", " ").replace(" ", " ")) + # t = segment_text(t) t = t.lower()