diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py index ef29825..b27bb6c 100644 --- a/handlers/messages_routing.py +++ b/handlers/messages_routing.py @@ -48,40 +48,42 @@ async def messages_routing(msg, state): reply_to_msg_id = reply_msg.get("message_id") if not reply_to_msg_id and latest_toxic_message_id: reply_to_msg_id = int(latest_toxic_message_id) + + # count toxicity + if reply_to_msg_id: + # count one message score + one_score = await redis.get(f"toxic:{cid}:{uid}:{reply_to_msg_id}") + reply_text = "" + if one_score: + logger.debug(one_score) + reply_text += f"{int(one_score)}% токсичности\n" - # count average between all of messages - toxic_pattern = f"toxic:{cid}:{uid}:*" - toxic_score = await get_average_pattern(toxic_pattern) + # count average between all of messages + toxic_pattern = f"toxic:{cid}:{uid}:*" + toxic_score = await get_average_pattern(toxic_pattern) - # current mesasage toxicity - if reply_to_msg_id: - one_score = await redis.get(f"toxic:{cid}:{uid}:{reply_to_msg_id}") - reply_text = "" - if one_score: - logger.debug(one_score) - reply_text += f"{int(one_score)}% токсичности\n" - if toxic_score: - emoji = ( - "😳" - if toxic_score > 90 - else "😟" - if toxic_score > 80 - else "😏" - if toxic_score > 60 - else "🙂" - if toxic_score > 20 - else "😇" - ) - reply_text += ( - f"Средняя токсичность сообщений: {toxic_score}% {emoji}" - ) - if reply_text: - await telegram_api( - "sendMessage", - chat_id=cid, - reply_to_message_id=reply_to_msg_id, - text=reply_text, - ) + if toxic_score: + emoji = ( + "😳" + if toxic_score > 90 + else "😟" + if toxic_score > 80 + else "😏" + if toxic_score > 60 + else "🙂" + if toxic_score > 20 + else "😇" + ) + reply_text += ( + f"Средняя токсичность сообщений: {toxic_score}% {emoji}" + ) + if reply_text: + await telegram_api( + "sendMessage", + chat_id=cid, + reply_to_message_id=reply_to_msg_id, + text=reply_text, + ) try: await telegram_api("deleteMessage", chat_id=cid, message_id=mid) except Exception: diff --git a/nlp/normalize.py b/nlp/normalize.py index 1493dec..9e4f1fa 100644 --- a/nlp/normalize.py +++ b/nlp/normalize.py @@ -1,13 +1,7 @@ -import torch -from transformers import ByT5Tokenizer, T5ForConditionalGeneration import logging logger = logging.getLogger("nlp.normalize") -# Use ByT5 for the ByT5 model -tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small") -model = T5ForConditionalGeneration.from_pretrained("google/byt5-small") - def is_russian_wording(text): """ @@ -22,24 +16,6 @@ def is_russian_wording(text): return True return False - -def segment_text(text): - """ - Use a neural network model to segment text into words. - """ - # Encode the input text for the model as UTF-8 bytes - inputs = tokenizer.encode("segment: " + text, return_tensors="pt") - - # Generate predictions - with torch.no_grad(): - outputs = model.generate(inputs) - - # Decode the generated tokens back to text - segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - - return segmented_text - - def normalize(text): """ Normalize English text to resemble Russian characters. diff --git a/nlp/segment_text.py b/nlp/segment_text.py new file mode 100644 index 0000000..67cbea8 --- /dev/null +++ b/nlp/segment_text.py @@ -0,0 +1,25 @@ +import torch +from transformers import ByT5Tokenizer, T5ForConditionalGeneration + +# Use ByT5 for the ByT5 model +tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small") +model = T5ForConditionalGeneration.from_pretrained("google/byt5-small") + + + + +def segment_text(text): + """ + Use a neural network model to segment text into words. + """ + # Encode the input text for the model as UTF-8 bytes + inputs = tokenizer.encode("segment: " + text, return_tensors="pt") + + # Generate predictions + with torch.no_grad(): + outputs = model.generate(inputs) + + # Decode the generated tokens back to text + segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + + return segmented_text diff --git a/requirements.txt b/requirements.txt index 84066ab..a0269bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ redis[hiredis] aiohttp torch transformers -protobuf -sentencepiece \ No newline at end of file +# protobuf +# sentencepiece \ No newline at end of file