welcomecenterbot/handlers/messages_routing.py
2024-09-29 14:12:55 +03:00

147 lines
5.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import math
from state.redis import redis
from state.scan import get_average_pattern
from bot.api import telegram_api, download_file
from bot.config import FEEDBACK_CHAT_ID
from handlers.handle_private import handle_private
from nlp.toxicity_detector import detector
from nlp.normalize import normalize
from nlp.ocr import ocr_recognize
from nlp.stopwords_detector import check_stopwords
logger = logging.getLogger("handlers.messages_routing")
async def messages_routing(msg, state):
cid = msg["chat"]["id"]
uid = msg["from"]["id"]
text = msg.get("caption", msg.get("text", ""))
reply_msg = msg.get("reply_to_message")
if str(cid) == FEEDBACK_CHAT_ID:
# сообщения из группы обратной связи
logger.info("feedback chat message: ", msg)
logger.debug(msg)
if reply_msg:
reply_chat_id = reply_msg.get("chat", {}).get("id")
if reply_chat_id != FEEDBACK_CHAT_ID.replace('-', '-100'):
await telegram_api(
"sendMessage",
chat_id=reply_chat_id,
text=text,
reply_to_message_id=reply_msg.get("message_id"),
)
return
elif cid == uid:
# сообщения в личке с ботом
logger.info("private chat message: ", msg)
await handle_private(msg, state)
return
elif bool(text) or msg.get("photo"):
mid = msg.get("message_id")
if text == "/toxic@welcomecenter_bot":
# latest in chat
latest_toxic_message_id = await redis.get(f"toxic:{cid}")
# reply_to message_id
reply_to_msg_id = mid
if reply_msg:
reply_to_msg_id = reply_msg.get("message_id")
if not reply_to_msg_id and latest_toxic_message_id:
reply_to_msg_id = int(latest_toxic_message_id)
# count one msg toxicity
if reply_to_msg_id:
# count one message score
one_score = await redis.get(f"toxic:{cid}:{uid}:{reply_to_msg_id}")
reply_text = ""
if one_score:
logger.debug(one_score)
reply_text += f"{int(one_score)}% токсичности\n"
# count overall toxycity
try:
toxic_pattern = f"toxic:{cid}:{uid}:*"
toxic_score = await get_average_pattern(toxic_pattern)
if toxic_score:
emoji = (
"🤯" if toxic_score > 100
else "😳"
if toxic_score > 90
else "😟"
if toxic_score > 80
else "😏"
if toxic_score > 60
else "🙂"
if toxic_score > 20
else "😇"
)
reply_text += (
f"Средняя токсичность сообщений: {toxic_score}% {emoji}"
)
except Exception:
pass
if reply_text:
await telegram_api(
"sendMessage",
chat_id=cid,
reply_to_message_id=reply_to_msg_id,
text=reply_text,
)
try:
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
except Exception:
pass
elif text == "/removed@welcomecenter_bot":
try:
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
except Exception:
pass
else:
# on screen recognition
for photo in msg.get("photo", []):
file_id = photo.get("file_id")
if file_id:
async for temp_file_path in download_file(file_id):
text += ocr_recognize(temp_file_path)
text += '\n'
normalized_text = normalize(text)
logger.info(f"normalized text: {normalized_text}")
toxic_score = detector(normalized_text)
sw_score = 0
if toxic_score < 0.91:
logger.info('re-check stopwords in combinations')
stopwords_detected = check_stopwords(normalized_text)
for stopword in stopwords_detected:
sw_score += detect(stopword)
toxic_perc = toxic_score * 100
logger.info(f'original toxic: {toxic_perc}')
toxic_perc = (toxic_score + sw_score) * 100
logger.info(f'added stopwords toxic: {sw_score*100}')
await redis.set(f"toxic:{cid}", mid)
await redis.set(f"toxic:{cid}:{uid}:{mid}", math.floor(toxic_perc), ex=60 * 60 * 24 * 3)
if toxic_score > 0.75:
if toxic_score > 0.90:
await redis.set(f"removed:{uid}:{cid}:{mid}", text)
try:
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
except Exception:
pass
else:
await telegram_api(
"setMessageReaction",
chat_id=cid,
is_big=True,
message_id=mid,
reaction='[{"type":"emoji", "emoji":"🙉"}]',
)
else:
pass