.

2024-09-26 13:07:01 +03:00
parent 1c8bc26c64
commit 0fa336978f
4 changed files with 42 additions and 18 deletions
--- a/handlers/messages_routing.py
+++ b/handlers/messages_routing.py
@@ -3,8 +3,7 @@ import math

 from bot.api import telegram_api
 from bot.config import FEEDBACK_CHAT_ID
-from nlp.toxicity import text2toxicity
-from nlp.replying import get_toxic_reply
+from nlp.toxicity_detector import detector
 from handlers.handle_private import handle_private

 logger = logging.getLogger('handlers.messages_routing')
@@ -31,18 +30,26 @@ async def messages_routing(msg, state):
            if reply_chat_id != FEEDBACK_CHAT_ID:
                await telegram_api("sendMessage", chat_id=reply_chat_id, text=text, reply_to=reply_msg.get("message_id"))

+    # TODO: implement text2toxicity with https://huggingface.co/s-nlp/russian_toxicity_classifier 
    elif bool(text):
-        toxic_score = text2toxicity(text)
+        mid = msg.get("message_id")
+        non_toxic_score, toxic_score = detector(text)
        logger.info(f'\ntext: {text}\ntoxic: {math.floor(toxic_score*100)}%')
        if toxic_score > 0.71:
-            toxic_reply = get_toxic_reply(toxic_score)
-            await telegram_api(
-                "setMessageReaction",
-                chat_id=cid,
-                is_big=True,
-                message_id=msg.get("message_id"),
-                reaction=f'[{{"type":"emoji", "emoji":"{toxic_reply}"}}]'
-            )
+            if toxic_score > 0.85:
+                await telegram_api(
+                    "deletemessage",
+                    chat_id=cid,
+                    message_id=mid
+                )
+            else:
+                await telegram_api(
+                    "setMessageReaction",
+                    chat_id=cid,
+                    is_big=True,
+                    message_id=mid,
+                    reaction=f'[{{"type":"emoji", "emoji":"🙉"}}]'
+                )

    else:
        pass
--- a/main.py
+++ b/main.py
@@ -1,22 +1,24 @@
 import asyncio
 import logging
+import signal
+import sys
 from aiohttp import ClientSession
-from bot.api import telegram_api
-from bot.config import FEEDBACK_CHAT_ID
-from handlers.handle_join_request import handle_join_request, handle_reaction_on_request
 from handlers.messages_routing import messages_routing
+from handlers.handle_join_request import handle_join_request, handle_reaction_on_request
+from bot.config import BOT_TOKEN, FEEDBACK_CHAT_ID
+from bot.api import telegram_api

 logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger('main')
+logger = logging.getLogger(__name__)
 state = dict()


-async def main():
+async def start():
    logger.info("\tstarted")
    async with ClientSession() as session:
        offset = 0  # начальное значение offset
        while True:
-            response = await telegram_api("getUpdates", offset=offset, allowed_updates=['message', 'edited_message', 'message_reaction','chat_join_request', 'chat_member'])
+            response = await telegram_api("getUpdates", offset=offset, allowed_updates=['message', 'message_reaction'])
            if isinstance(response, dict):
                result = response.get("result", [])
                for update in result:
@@ -47,4 +49,4 @@ async def main():

 if __name__ == "__main__":
    # Запуск асинхронного цикла
-    asyncio.run(main())
+    asyncio.run(start())
--- a/nlp/toxycity_detector.py
+++ b/nlp/toxycity_detector.py
@@ -0,0 +1,13 @@
+from transformers import BertTokenizer, BertForSequenceClassification
+
+# load tokenizer and model weights
+tokenizer = BertTokenizer.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')
+model = BertForSequenceClassification.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')
+
+
+def detector(text):
+    # prepare the input
+    batch = tokenizer.encode(text, return_tensors='pt')
+
+    # inference
+    model(batch)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 aiohttp
 redis[hiredis]
+tensorflow
+transformers