toxic-debug15
This commit is contained in:
parent
a9203fdeaf
commit
8d6daeabe2
|
@ -56,7 +56,7 @@ async def messages_routing(msg, state):
|
|||
message_id=mid
|
||||
)
|
||||
else:
|
||||
toxic_score = detector(text.lower())
|
||||
toxic_score = detector(normalize(text))
|
||||
toxic_perc = math.floor(toxic_score*100)
|
||||
await redis.set(f"toxic:{cid}", mid)
|
||||
await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60*60*24*3)
|
||||
|
|
29
utils/normalize.py
Normal file
29
utils/normalize.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
def is_russian_wording(text):
|
||||
"""
|
||||
Check if the text contains any Russian characters by checking
|
||||
each character against the Unicode range for Cyrillic.
|
||||
"""
|
||||
# Check if any character in the text is a Cyrillic character
|
||||
for char in text:
|
||||
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
|
||||
return True
|
||||
return False
|
||||
|
||||
def normalize(text):
|
||||
"""
|
||||
Normalize English text to resemble Russian characters.
|
||||
"""
|
||||
if is_russian_wording(text):
|
||||
# Normalize the text by replacing characters
|
||||
text = (text.lower()
|
||||
.replace('e', 'е')
|
||||
.replace('o', 'о')
|
||||
.replace('x', 'х')
|
||||
.replace('a', 'а')
|
||||
.replace('r', 'г')
|
||||
.replace('m', 'м')
|
||||
.replace('u', 'и')
|
||||
.replace('n', 'п')
|
||||
.replace('p', 'р'))
|
||||
|
||||
return text
|
Loading…
Reference in New Issue
Block a user