From e1ae8a2de07df52fc0635a0a89a88abc0bbb3642 Mon Sep 17 00:00:00 2001 From: Untone Date: Fri, 27 Sep 2024 10:15:18 +0300 Subject: [PATCH] normlizer-fix2 --- nlp/normalize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nlp/normalize.py b/nlp/normalize.py index b7e7d2b..ae56a4e 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -1,9 +1,10 @@ import torch -from transformers import ByT5Tokenizer, ByT5ForConditionalGeneration +from transformers import ByT5Tokenizer, T5ForConditionalGeneration # Use ByT5 for the ByT5 model tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small") -model = ByT5ForConditionalGeneration.from_pretrained("google/byt5-small") +model = T5ForConditionalGeneration.from_pretrained("google/byt5-small") + def is_russian_wording(text): """ @@ -80,7 +81,7 @@ def normalize(text): # Example usage if __name__ == "__main__": - input_text = "Hello, this is a test input." + input_text = "привет шп ана т у п а я" normalized_output = normalize(input_text) print(normalized_output)