normlizer-fix2

2024-09-27 10:15:18 +03:00
parent bebf0caf65
commit e1ae8a2de0
1 changed files with 4 additions and 3 deletions
--- a/nlp/normalize.py
+++ b/nlp/normalize.py
@@ -1,9 +1,10 @@
 import torch
-from transformers import ByT5Tokenizer, ByT5ForConditionalGeneration
+from transformers import ByT5Tokenizer, T5ForConditionalGeneration

 # Use ByT5 for the ByT5 model
 tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
-model = ByT5ForConditionalGeneration.from_pretrained("google/byt5-small")
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+

 def is_russian_wording(text):
    """
@@ -80,7 +81,7 @@ def normalize(text):

 # Example usage
 if __name__ == "__main__":
-    input_text = "Hello, this is a test input."
+    input_text = "привет шп  ана т у п а я"

    normalized_output = normalize(input_text)
    print(normalized_output)