linter+fmt

This commit is contained in:
2024-09-27 13:30:29 +03:00
parent eff5cdb394
commit 21591df427
7 changed files with 14 additions and 15 deletions

View File

@@ -1,5 +1,8 @@
import torch
from transformers import ByT5Tokenizer, T5ForConditionalGeneration
import logging
logger = logging.getLogger("nlp.normalize")
# Use ByT5 for the ByT5 model
tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
@@ -48,8 +51,8 @@ def normalize(text):
if is_russian_wording(t):
# Normalize the text by replacing characters
normalized_text = (t
.replace("e", "е")
normalized_text = (
t.replace("e", "е")
.replace("o", "о")
.replace("x", "х")
.replace("a", "а")
@@ -73,7 +76,7 @@ def normalize(text):
)
return normalized_text
logger.debug('normalized: ', t)
logger.debug("normalized: ", t)
return t