normlizer-fix5
This commit is contained in:
parent
e241e14764
commit
66d0bba13c
|
@ -42,12 +42,13 @@ def normalize(text):
|
||||||
Normalize English text to resemble Russian characters.
|
Normalize English text to resemble Russian characters.
|
||||||
"""
|
"""
|
||||||
# Segment the text first
|
# Segment the text first
|
||||||
t = segment_text(text.replace(" ", " "))
|
t = segment_text(text.replace(" ", " ").replace(" ", " ").replace(" ", " "))
|
||||||
|
|
||||||
|
t = t.lower()
|
||||||
|
|
||||||
if is_russian_wording(t):
|
if is_russian_wording(t):
|
||||||
# Normalize the text by replacing characters
|
# Normalize the text by replacing characters
|
||||||
normalized_text = (
|
normalized_text = (t
|
||||||
t.lower()
|
|
||||||
.replace("e", "е")
|
.replace("e", "е")
|
||||||
.replace("o", "о")
|
.replace("o", "о")
|
||||||
.replace("x", "х")
|
.replace("x", "х")
|
||||||
|
@ -82,4 +83,3 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
normalized_output = normalize(input_text)
|
normalized_output = normalize(input_text)
|
||||||
print(normalized_output)
|
print(normalized_output)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user