From 0fa336978f879c1d58de22fad68e0309ec01ebb1 Mon Sep 17 00:00:00 2001 From: Untone Date: Thu, 26 Sep 2024 13:07:01 +0300 Subject: [PATCH] . --- handlers/messages_routing.py | 29 ++++++++++++++++++----------- main.py | 16 +++++++++------- nlp/toxycity_detector.py | 13 +++++++++++++ requirements.txt | 2 ++ 4 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 nlp/toxycity_detector.py diff --git a/handlers/messages_routing.py b/handlers/messages_routing.py index 0c647d4..3ad803c 100644 --- a/handlers/messages_routing.py +++ b/handlers/messages_routing.py @@ -3,8 +3,7 @@ import math from bot.api import telegram_api from bot.config import FEEDBACK_CHAT_ID -from nlp.toxicity import text2toxicity -from nlp.replying import get_toxic_reply +from nlp.toxicity_detector import detector from handlers.handle_private import handle_private logger = logging.getLogger('handlers.messages_routing') @@ -31,18 +30,26 @@ async def messages_routing(msg, state): if reply_chat_id != FEEDBACK_CHAT_ID: await telegram_api("sendMessage", chat_id=reply_chat_id, text=text, reply_to=reply_msg.get("message_id")) + # TODO: implement text2toxicity with https://huggingface.co/s-nlp/russian_toxicity_classifier elif bool(text): - toxic_score = text2toxicity(text) + mid = msg.get("message_id") + non_toxic_score, toxic_score = detector(text) logger.info(f'\ntext: {text}\ntoxic: {math.floor(toxic_score*100)}%') if toxic_score > 0.71: - toxic_reply = get_toxic_reply(toxic_score) - await telegram_api( - "setMessageReaction", - chat_id=cid, - is_big=True, - message_id=msg.get("message_id"), - reaction=f'[{{"type":"emoji", "emoji":"{toxic_reply}"}}]' - ) + if toxic_score > 0.85: + await telegram_api( + "deletemessage", + chat_id=cid, + message_id=mid + ) + else: + await telegram_api( + "setMessageReaction", + chat_id=cid, + is_big=True, + message_id=mid, + reaction=f'[{{"type":"emoji", "emoji":"🙉"}}]' + ) else: pass diff --git a/main.py b/main.py index 60cc8cf..8e97dbe 100644 --- a/main.py +++ b/main.py @@ -1,22 +1,24 @@ import asyncio import logging +import signal +import sys from aiohttp import ClientSession -from bot.api import telegram_api -from bot.config import FEEDBACK_CHAT_ID -from handlers.handle_join_request import handle_join_request, handle_reaction_on_request from handlers.messages_routing import messages_routing +from handlers.handle_join_request import handle_join_request, handle_reaction_on_request +from bot.config import BOT_TOKEN, FEEDBACK_CHAT_ID +from bot.api import telegram_api logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger('main') +logger = logging.getLogger(__name__) state = dict() -async def main(): +async def start(): logger.info("\tstarted") async with ClientSession() as session: offset = 0 # начальное значение offset while True: - response = await telegram_api("getUpdates", offset=offset, allowed_updates=['message', 'edited_message', 'message_reaction','chat_join_request', 'chat_member']) + response = await telegram_api("getUpdates", offset=offset, allowed_updates=['message', 'message_reaction']) if isinstance(response, dict): result = response.get("result", []) for update in result: @@ -47,4 +49,4 @@ async def main(): if __name__ == "__main__": # Запуск асинхронного цикла - asyncio.run(main()) + asyncio.run(start()) diff --git a/nlp/toxycity_detector.py b/nlp/toxycity_detector.py new file mode 100644 index 0000000..0cb2fe9 --- /dev/null +++ b/nlp/toxycity_detector.py @@ -0,0 +1,13 @@ +from transformers import BertTokenizer, BertForSequenceClassification + +# load tokenizer and model weights +tokenizer = BertTokenizer.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier') +model = BertForSequenceClassification.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier') + + +def detector(text): + # prepare the input + batch = tokenizer.encode(text, return_tensors='pt') + + # inference + model(batch) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9ae2dff..c784ec9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ aiohttp redis[hiredis] +tensorflow +transformers \ No newline at end of file