normlizer-fix2
This commit is contained in:
parent
bebf0caf65
commit
e1ae8a2de0
|
@ -1,9 +1,10 @@
|
||||||
import torch
|
import torch
|
||||||
from transformers import ByT5Tokenizer, ByT5ForConditionalGeneration
|
from transformers import ByT5Tokenizer, T5ForConditionalGeneration
|
||||||
|
|
||||||
# Use ByT5 for the ByT5 model
|
# Use ByT5 for the ByT5 model
|
||||||
tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
|
tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
|
||||||
model = ByT5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
||||||
|
|
||||||
|
|
||||||
def is_russian_wording(text):
|
def is_russian_wording(text):
|
||||||
"""
|
"""
|
||||||
|
@ -80,7 +81,7 @@ def normalize(text):
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_text = "Hello, this is a test input."
|
input_text = "привет шп ана т у п а я"
|
||||||
|
|
||||||
normalized_output = normalize(input_text)
|
normalized_output = normalize(input_text)
|
||||||
print(normalized_output)
|
print(normalized_output)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user