diff --git a/nlp/normalize.py b/nlp/normalize.py index 4db8b60..0229554 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -42,12 +42,13 @@ def normalize(text): Normalize English text to resemble Russian characters. """ # Segment the text first - t = segment_text(text.replace(" ", " ")) + t = segment_text(text.replace(" ", " ").replace(" ", " ").replace(" ", " ")) + + t = t.lower() if is_russian_wording(t): # Normalize the text by replacing characters - normalized_text = ( - t.lower() + normalized_text = (t .replace("e", "е") .replace("o", "о") .replace("x", "х") @@ -82,4 +83,3 @@ if __name__ == "__main__": normalized_output = normalize(input_text) print(normalized_output) - \ No newline at end of file