From 9c0d3cf4e173b1fcfa2022508564a85b384d2f01 Mon Sep 17 00:00:00 2001
From: Untone <anton.rewin@gmail.com>
Date: Sun, 29 Sep 2024 14:12:55 +0300
Subject: [PATCH] added-fix

---
 handlers/messages_routing.py | 14 +++++++++-----
 nlp/stopwords_detector.py    | 16 ++--------------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py
index 1b1ab3c..860f194 100644
--- a/handlers/messages_routing.py
+++ b/handlers/messages_routing.py
@@ -112,14 +112,18 @@ async def messages_routing(msg, state):
                         text += '\n'
 
             normalized_text = normalize(text)
+            logger.info(f"normalized text: {normalized_text}")
             toxic_score = detector(normalized_text)
+            sw_score = 0
+            if toxic_score < 0.91:
+                logger.info('re-check stopwords in combinations')
+                stopwords_detected = check_stopwords(normalized_text)
+                for stopword in stopwords_detected:
+                    sw_score += detect(stopword)
             toxic_perc = toxic_score * 100
             logger.info(f'original toxic: {toxic_perc}')
-            if toxic_score < 0.91:
-                logger.info('re-check without spaces')
-                toxic_perc += check_stopwords(normalized_text)
-            logger.info(f"text: {normalized_text}\ntoxic: {toxic_perc}%")
-
+            toxic_perc = (toxic_score + sw_score) * 100
+            logger.info(f'added stopwords toxic: {sw_score*100}')
             await redis.set(f"toxic:{cid}", mid)
             await redis.set(f"toxic:{cid}:{uid}:{mid}", math.floor(toxic_perc), ex=60 * 60 * 24 * 3)
             if toxic_score > 0.75:
diff --git a/nlp/stopwords_detector.py b/nlp/stopwords_detector.py
index 9dcfab4..b70ff9b 100644
--- a/nlp/stopwords_detector.py
+++ b/nlp/stopwords_detector.py
@@ -72,16 +72,7 @@ def check_stopwords(text):
 
     Examples:
         >>> check_stopwords("this is a хуй")
-        40
-
-        >>> check_stopwords("this is clean")
-        0
-
-        >>> check_stopwords("хуй is a хуй")
-        80
-
-        >>> check_stopwords("clean is clean")
-        0
+        {'хуй'}
     """
 
     # Normalize the text by splitting into words
@@ -90,10 +81,7 @@ def check_stopwords(text):
     # Check for any intersection with stopword_set
     stopwords_found = stopword_set.intersection(words)
 
-    # Calculate the score based on the number of stopwords found
-    score = 90 + len(stopwords_found)
-
-    return score
+    return stopwords_found
 
 # Example usage
 if __name__ == "__main__":