2024-09-26 18:36:14 +00:00
|
|
|
|
def is_russian_wording(text):
|
|
|
|
|
"""
|
|
|
|
|
Check if the text contains any Russian characters by checking
|
|
|
|
|
each character against the Unicode range for Cyrillic.
|
|
|
|
|
"""
|
|
|
|
|
# Check if any character in the text is a Cyrillic character
|
|
|
|
|
for char in text:
|
|
|
|
|
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def normalize(text):
|
|
|
|
|
"""
|
|
|
|
|
Normalize English text to resemble Russian characters.
|
|
|
|
|
"""
|
|
|
|
|
if is_russian_wording(text):
|
|
|
|
|
# Normalize the text by replacing characters
|
|
|
|
|
text = (text.lower()
|
|
|
|
|
.replace('e', 'е')
|
|
|
|
|
.replace('o', 'о')
|
|
|
|
|
.replace('x', 'х')
|
|
|
|
|
.replace('a', 'а')
|
|
|
|
|
.replace('r', 'г')
|
|
|
|
|
.replace('m', 'м')
|
|
|
|
|
.replace('u', 'и')
|
|
|
|
|
.replace('n', 'п')
|
2024-09-26 18:53:26 +00:00
|
|
|
|
.replace('p', 'р')
|
|
|
|
|
.replace('t', 'т')
|
|
|
|
|
.replace('y', 'у')
|
|
|
|
|
.replace('h', 'н')
|
|
|
|
|
.replace('p', 'р')
|
|
|
|
|
.replace('i', 'й')
|
|
|
|
|
.replace('c', 'с')
|
|
|
|
|
.replace('k', 'к')
|
|
|
|
|
.replace('b', 'в')
|
|
|
|
|
.replace('3', 'з')
|
|
|
|
|
.replace('4', 'ч')
|
|
|
|
|
.replace('0', 'о')
|
|
|
|
|
.replace('e', 'е')
|
|
|
|
|
.replace('d', 'д')
|
|
|
|
|
)
|
2024-09-26 18:36:14 +00:00
|
|
|
|
|
|
|
|
|
return text
|