From 66d0bba13caf54caa76d7f54d87ac09a1eef1e6a Mon Sep 17 00:00:00 2001 From: Untone Date: Fri, 27 Sep 2024 11:18:18 +0300 Subject: [PATCH] normlizer-fix5 --- nlp/normalize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nlp/normalize.py b/nlp/normalize.py index 4db8b60..0229554 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -42,12 +42,13 @@ def normalize(text): Normalize English text to resemble Russian characters. """ # Segment the text first - t = segment_text(text.replace(" ", " ")) + t = segment_text(text.replace(" ", " ").replace(" ", " ").replace(" ", " ")) + + t = t.lower() if is_russian_wording(t): # Normalize the text by replacing characters - normalized_text = ( - t.lower() + normalized_text = (t .replace("e", "е") .replace("o", "о") .replace("x", "х") @@ -82,4 +83,3 @@ if __name__ == "__main__": normalized_output = normalize(input_text) print(normalized_output) - \ No newline at end of file