From 9c0d3cf4e173b1fcfa2022508564a85b384d2f01 Mon Sep 17 00:00:00 2001 From: Untone Date: Sun, 29 Sep 2024 14:12:55 +0300 Subject: [PATCH] added-fix --- handlers/messages_routing.py | 14 +++++++++----- nlp/stopwords_detector.py | 16 ++-------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py index 1b1ab3c..860f194 100644 --- a/handlers/messages_routing.py +++ b/handlers/messages_routing.py @@ -112,14 +112,18 @@ async def messages_routing(msg, state): text += '\n' normalized_text = normalize(text) + logger.info(f"normalized text: {normalized_text}") toxic_score = detector(normalized_text) + sw_score = 0 + if toxic_score < 0.91: + logger.info('re-check stopwords in combinations') + stopwords_detected = check_stopwords(normalized_text) + for stopword in stopwords_detected: + sw_score += detect(stopword) toxic_perc = toxic_score * 100 logger.info(f'original toxic: {toxic_perc}') - if toxic_score < 0.91: - logger.info('re-check without spaces') - toxic_perc += check_stopwords(normalized_text) - logger.info(f"text: {normalized_text}\ntoxic: {toxic_perc}%") - + toxic_perc = (toxic_score + sw_score) * 100 + logger.info(f'added stopwords toxic: {sw_score*100}') await redis.set(f"toxic:{cid}", mid) await redis.set(f"toxic:{cid}:{uid}:{mid}", math.floor(toxic_perc), ex=60 * 60 * 24 * 3) if toxic_score > 0.75: diff --git a/nlp/stopwords_detector.py b/nlp/stopwords_detector.py index 9dcfab4..b70ff9b 100644 --- a/nlp/stopwords_detector.py +++ b/nlp/stopwords_detector.py @@ -72,16 +72,7 @@ def check_stopwords(text): Examples: >>> check_stopwords("this is a хуй") - 40 - - >>> check_stopwords("this is clean") - 0 - - >>> check_stopwords("хуй is a хуй") - 80 - - >>> check_stopwords("clean is clean") - 0 + {'хуй'} """ # Normalize the text by splitting into words @@ -90,10 +81,7 @@ def check_stopwords(text): # Check for any intersection with stopword_set stopwords_found = stopword_set.intersection(words) - # Calculate the score based on the number of stopwords found - score = 90 + len(stopwords_found) - - return score + return stopwords_found # Example usage if __name__ == "__main__":