linter+fmt
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import torch
|
||||
from transformers import ByT5Tokenizer, T5ForConditionalGeneration
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("nlp.normalize")
|
||||
|
||||
# Use ByT5 for the ByT5 model
|
||||
tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
|
||||
@@ -48,8 +51,8 @@ def normalize(text):
|
||||
|
||||
if is_russian_wording(t):
|
||||
# Normalize the text by replacing characters
|
||||
normalized_text = (t
|
||||
.replace("e", "е")
|
||||
normalized_text = (
|
||||
t.replace("e", "е")
|
||||
.replace("o", "о")
|
||||
.replace("x", "х")
|
||||
.replace("a", "а")
|
||||
@@ -73,7 +76,7 @@ def normalize(text):
|
||||
)
|
||||
|
||||
return normalized_text
|
||||
logger.debug('normalized: ', t)
|
||||
logger.debug("normalized: ", t)
|
||||
return t
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user