welcomecenterbot/utils/normalize.py
2024-09-26 21:36:14 +03:00

29 lines
975 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

def is_russian_wording(text):
"""
Check if the text contains any Russian characters by checking
each character against the Unicode range for Cyrillic.
"""
# Check if any character in the text is a Cyrillic character
for char in text:
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
return True
return False
def normalize(text):
"""
Normalize English text to resemble Russian characters.
"""
if is_russian_wording(text):
# Normalize the text by replacing characters
text = (text.lower()
.replace('e', 'е')
.replace('o', 'о')
.replace('x', 'х')
.replace('a', 'а')
.replace('r', 'г')
.replace('m', 'м')
.replace('u', 'и')
.replace('n', 'п')
.replace('p', 'р'))
return text