toxic-debug15

This commit is contained in:
Untone 2024-09-26 21:36:14 +03:00
parent a9203fdeaf
commit 8d6daeabe2
2 changed files with 30 additions and 1 deletions

View File

@ -56,7 +56,7 @@ async def messages_routing(msg, state):
message_id=mid
)
else:
toxic_score = detector(text.lower())
toxic_score = detector(normalize(text))
toxic_perc = math.floor(toxic_score*100)
await redis.set(f"toxic:{cid}", mid)
await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60*60*24*3)

29
utils/normalize.py Normal file
View File

@ -0,0 +1,29 @@
def is_russian_wording(text):
"""
Check if the text contains any Russian characters by checking
each character against the Unicode range for Cyrillic.
"""
# Check if any character in the text is a Cyrillic character
for char in text:
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
return True
return False
def normalize(text):
"""
Normalize English text to resemble Russian characters.
"""
if is_russian_wording(text):
# Normalize the text by replacing characters
text = (text.lower()
.replace('e', 'е')
.replace('o', 'о')
.replace('x', 'х')
.replace('a', 'а')
.replace('r', 'г')
.replace('m', 'м')
.replace('u', 'и')
.replace('n', 'п')
.replace('p', 'р'))
return text