ruffed
This commit is contained in:
parent
c1e481ded8
commit
a7b1925e8d
|
@ -1,12 +1,13 @@
|
||||||
from bot.api import telegram_api
|
from bot.api import telegram_api
|
||||||
from utils.mention import mention, userdata_extract
|
from utils.mention import userdata_extract
|
||||||
from utils.store import redis
|
from state.redis import redis
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def get_newcomer_message(msg):
|
def get_newcomer_message(msg):
|
||||||
lang = msg["from"].get("language_code", "ru")
|
lang = msg["from"].get("language_code", "ru")
|
||||||
r = "хочет присоединиться к нам здесь" if lang == "ru" else " wants to join us here"
|
r = "хочет присоединиться к нам здесь" if lang == "ru" else " wants to join us here"
|
||||||
|
@ -27,27 +28,36 @@ async def show_announce(msg):
|
||||||
userphotos_response = await telegram_api("getUserphotos", user_id=from_id)
|
userphotos_response = await telegram_api("getUserphotos", user_id=from_id)
|
||||||
|
|
||||||
file_id = ""
|
file_id = ""
|
||||||
if isinstance(userphotos_response, dict) and userphotos_response["ok"] and userphotos_response["result"]["total_count"] > 0:
|
if (
|
||||||
|
isinstance(userphotos_response, dict)
|
||||||
|
and userphotos_response["ok"]
|
||||||
|
and userphotos_response["result"]["total_count"] > 0
|
||||||
|
):
|
||||||
logger.info("showing button with photo")
|
logger.info("showing button with photo")
|
||||||
file_id = userphotos_response["result"]["photos"][0][0]["file_id"]
|
file_id = userphotos_response["result"]["photos"][0][0]["file_id"]
|
||||||
|
|
||||||
r = await telegram_api("sendPhoto",
|
r = await telegram_api(
|
||||||
|
"sendPhoto",
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
file_id=file_id,
|
file_id=file_id,
|
||||||
caption=newcomer_message,
|
caption=newcomer_message,
|
||||||
reply_to=mid
|
reply_to=mid,
|
||||||
)
|
)
|
||||||
announce_msg_id = r.get("message_id")
|
announce_msg_id = r.get("message_id")
|
||||||
await redis.set(f"announce:{chat_id}:{from_id}", announce_message_id)
|
await redis.set(f"announce:{chat_id}:{from_id}", announce_msg_id)
|
||||||
|
|
||||||
|
|
||||||
async def edit_announce(msg):
|
async def edit_announce(msg):
|
||||||
logger.info("editing announce")
|
logger.info("editing announce")
|
||||||
chat_id = str(msg["chat"]["id"])
|
chat_id = str(msg["chat"]["id"])
|
||||||
from_id = str(msg["from"]["id"])
|
from_id = str(msg["from"]["id"])
|
||||||
mid = msg.get("message_id", "")
|
|
||||||
caption = get_newcomer_message(msg) + msg.get("text").replace("/message ", "")
|
caption = get_newcomer_message(msg) + msg.get("text").replace("/message ", "")
|
||||||
announce_message_id = await redis.get(f"announce:{chat_id}:{from_id}")
|
announce_message_id = await redis.get(f"announce:{chat_id}:{from_id}")
|
||||||
if announce_message_id:
|
if announce_message_id:
|
||||||
r = await telegram_api("editMessageCaption", chat_id=chat_id, message_id=int(announce_message_id), caption=caption)
|
r = await telegram_api(
|
||||||
|
"editMessageCaption",
|
||||||
|
chat_id=chat_id,
|
||||||
|
message_id=int(announce_message_id),
|
||||||
|
caption=caption,
|
||||||
|
)
|
||||||
await redis.set(f"announce:{chat_id}:{from_id}", r.get("message_id"))
|
await redis.set(f"announce:{chat_id}:{from_id}", r.get("message_id"))
|
||||||
|
|
15
bot/api.py
15
bot/api.py
|
@ -5,7 +5,7 @@ from bot.config import BOT_TOKEN
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
# Create a logger instance
|
# Create a logger instance
|
||||||
logger = logging.getLogger('bot.api')
|
logger = logging.getLogger("bot.api")
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
api_base = f"https://api.telegram.org/bot{BOT_TOKEN}/"
|
api_base = f"https://api.telegram.org/bot{BOT_TOKEN}/"
|
||||||
|
@ -14,17 +14,20 @@ api_base = f"https://api.telegram.org/bot{BOT_TOKEN}/"
|
||||||
async def telegram_api(endpoint: str, json_data=None, **kwargs):
|
async def telegram_api(endpoint: str, json_data=None, **kwargs):
|
||||||
try:
|
try:
|
||||||
url = api_base + f"{endpoint}?{urlencode(kwargs)}"
|
url = api_base + f"{endpoint}?{urlencode(kwargs)}"
|
||||||
is_polling = endpoint == 'getUpdates'
|
is_polling = endpoint == "getUpdates"
|
||||||
headers = {'Content-Type': 'application/json'}
|
headers = {"Content-Type": "application/json"}
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
url = api_base + f"{endpoint}?{urlencode(kwargs)}"
|
url = api_base + f"{endpoint}?{urlencode(kwargs)}"
|
||||||
if not is_polling:
|
if not is_polling:
|
||||||
logger.info(f' >>> {url} {json_data if json_data else ""}')
|
logger.info(f' >>> {url} {json_data if json_data else ""}')
|
||||||
async with session.get(url, data=json.dumps(json_data), headers=headers) as response:
|
async with session.get(
|
||||||
|
url, data=json.dumps(json_data), headers=headers
|
||||||
|
) as response:
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
if not is_polling:
|
if not is_polling:
|
||||||
logger.info(f' <<< {data}')
|
logger.info(f" <<< {data}")
|
||||||
return data
|
return data
|
||||||
except Exception as ex:
|
except Exception:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
|
@ -9,10 +9,30 @@ logger = logging.getLogger(__name__)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
positive_reactions = ["👍", "❤", "🔥", "🥰", "👏", "🎉", "🙏", "👌", "🕊", "😍", "❤🔥", "🍓", "🍾", "💋", "😇", "🤝", "🤗", "💘", "😘"]
|
positive_reactions = [
|
||||||
|
"👍",
|
||||||
|
"❤",
|
||||||
|
"🔥",
|
||||||
|
"🥰",
|
||||||
|
"👏",
|
||||||
|
"🎉",
|
||||||
|
"🙏",
|
||||||
|
"👌",
|
||||||
|
"🕊",
|
||||||
|
"😍",
|
||||||
|
"❤🔥",
|
||||||
|
"🍓",
|
||||||
|
"🍾",
|
||||||
|
"💋",
|
||||||
|
"😇",
|
||||||
|
"🤝",
|
||||||
|
"🤗",
|
||||||
|
"💘",
|
||||||
|
"😘",
|
||||||
|
]
|
||||||
announced_message = {
|
announced_message = {
|
||||||
"ru": "Запрос на вступление опубликован в чате, как только вас узнают и отреагируют - она будет принята",
|
"ru": "Запрос на вступление опубликован в чате, как только вас узнают и отреагируют - она будет принята",
|
||||||
"en": "The join request is posted in the chat, once you are recognized and someone reacted to - it will be accepted"
|
"en": "The join request is posted in the chat, once you are recognized and someone reacted to - it will be accepted",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,12 +42,14 @@ async def handle_join_request(join_request):
|
||||||
|
|
||||||
lang = user.get("language_code", "ru")
|
lang = user.get("language_code", "ru")
|
||||||
# показываем для FEEDBACK_CHAT
|
# показываем для FEEDBACK_CHAT
|
||||||
await telegram_api("sendMessage", chat_id=FEEDBACK_CHAT_ID, text="новая заявка от " + mention(user))
|
await telegram_api(
|
||||||
|
"sendMessage", chat_id=FEEDBACK_CHAT_ID, text="новая заявка от " + mention(user)
|
||||||
|
)
|
||||||
# показываем анонс с заявкой
|
# показываем анонс с заявкой
|
||||||
await show_announce(join_request)
|
await show_announce(join_request)
|
||||||
|
|
||||||
# сообщаем пользователю, что опубликовали анонс его заявки
|
# сообщаем пользователю, что опубликовали анонс его заявки
|
||||||
await telegram_api("sendMessage", chat_id=user['id'], text=announced_message[lang])
|
await telegram_api("sendMessage", chat_id=user["id"], text=announced_message[lang])
|
||||||
|
|
||||||
|
|
||||||
async def handle_reaction_on_request(update):
|
async def handle_reaction_on_request(update):
|
||||||
|
@ -39,5 +61,7 @@ async def handle_reaction_on_request(update):
|
||||||
new_reaction = reaction.get("new_reaction")
|
new_reaction = reaction.get("new_reaction")
|
||||||
if new_reaction.get("emoji") in positive_reactions:
|
if new_reaction.get("emoji") in positive_reactions:
|
||||||
# за пользователя поручились
|
# за пользователя поручились
|
||||||
r = await telegram_api("approveChatJoinRequest", chat_id=chat_id, user_id=from_id)
|
r = await telegram_api(
|
||||||
|
"approveChatJoinRequest", chat_id=chat_id, user_id=from_id
|
||||||
|
)
|
||||||
logger.debug(r)
|
logger.debug(r)
|
||||||
|
|
|
@ -2,14 +2,14 @@ from bot.config import FEEDBACK_CHAT_ID
|
||||||
from bot.announce import edit_announce
|
from bot.announce import edit_announce
|
||||||
from bot.api import telegram_api
|
from bot.api import telegram_api
|
||||||
import logging
|
import logging
|
||||||
from utils.store import get_all_pattern, get_average_pattern
|
from state.scan import get_all_pattern, get_average_pattern
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
start_message = {
|
start_message = {
|
||||||
'en': "Welcome home! You can type any message here to be passed to chat",
|
"en": "Welcome home! You can type any message here to be passed to chat",
|
||||||
'ru': "Доброе утро! Можешь напечатать здесь любое сообщение для передачи в чат"
|
"ru": "Доброе утро! Можешь напечатать здесь любое сообщение для передачи в чат",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,25 +21,35 @@ async def handle_private(msg, state):
|
||||||
if lang != "ru" and lang != "en":
|
if lang != "ru" and lang != "en":
|
||||||
lang = "en"
|
lang = "en"
|
||||||
if text and text.startswith("/"):
|
if text and text.startswith("/"):
|
||||||
if text == '/start':
|
if text == "/start":
|
||||||
await telegram_api("sendMessage", chat_id=uid, text=start_message[lang])
|
await telegram_api("sendMessage", chat_id=uid, text=start_message[lang])
|
||||||
state['welcome'] = True
|
state["welcome"] = True
|
||||||
elif state.get('welcome'):
|
elif state.get("welcome"):
|
||||||
await edit_announce(msg)
|
await edit_announce(msg)
|
||||||
state['welcome'] = False
|
state["welcome"] = False
|
||||||
return
|
return
|
||||||
elif text.startswith('/toxic'):
|
elif text.startswith("/toxic"):
|
||||||
|
cid = msg.get("chat", {}).get("id")
|
||||||
toxic_pattern = f"toxic:{uid}:{cid}:*"
|
toxic_pattern = f"toxic:{uid}:{cid}:*"
|
||||||
toxic_score = await get_average_toxic(toxic_pattern)
|
toxic_score = await get_average_pattern(toxic_pattern)
|
||||||
text = f"Средняя токсичность сообщений: {toxic_score}%"
|
text = f"Средняя токсичность сообщений: {toxic_score}%"
|
||||||
mid = msg.get("message_id")
|
mid = msg.get("message_id")
|
||||||
await telegram_api("sendMessage", chat_id=uid, reply_to_message_id=mid, text=text)
|
await telegram_api(
|
||||||
|
"sendMessage", chat_id=uid, reply_to_message_id=mid, text=text
|
||||||
|
)
|
||||||
return
|
return
|
||||||
elif text == '/removed':
|
elif text == "/removed":
|
||||||
removed_pattern = f"removed:{uid}:*"
|
removed_pattern = f"removed:{uid}:*"
|
||||||
removed_messages = await get_all_pattern(removed_pattern)
|
removed_messages = await get_all_pattern(removed_pattern)
|
||||||
if removed_messages:
|
if removed_messages:
|
||||||
await telegram_api("sendMessage", chat_id=uid, text="\n\n".join(removed_messages))
|
await telegram_api(
|
||||||
|
"sendMessage", chat_id=uid, text="\n\n".join(removed_messages)
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
await telegram_api("forwardMessage", from_chat_id=sender.get("id"), message_id=msg.get("message_id"), chat_id=FEEDBACK_CHAT_ID)
|
await telegram_api(
|
||||||
|
"forwardMessage",
|
||||||
|
from_chat_id=sender.get("id"),
|
||||||
|
message_id=msg.get("message_id"),
|
||||||
|
chat_id=FEEDBACK_CHAT_ID,
|
||||||
|
)
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
from utils.store import redis, get_average_pattern
|
from state.redis import redis
|
||||||
|
from state.scan import get_average_pattern
|
||||||
from bot.api import telegram_api
|
from bot.api import telegram_api
|
||||||
from bot.config import FEEDBACK_CHAT_ID
|
from bot.config import FEEDBACK_CHAT_ID
|
||||||
from nlp.toxicity_detector import detector
|
|
||||||
from handlers.handle_private import handle_private
|
from handlers.handle_private import handle_private
|
||||||
from utils.normalize import normalize
|
from nlp.toxicity_detector import detector
|
||||||
|
from nlp.normalize import normalize
|
||||||
|
|
||||||
logger = logging.getLogger('handlers.messages_routing')
|
logger = logging.getLogger("handlers.messages_routing")
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
async def messages_routing(msg, state):
|
async def messages_routing(msg, state):
|
||||||
cid = msg["chat"]["id"]
|
cid = msg["chat"]["id"]
|
||||||
uid = msg["from"]["id"]
|
uid = msg["from"]["id"]
|
||||||
|
@ -28,11 +30,16 @@ async def messages_routing(msg, state):
|
||||||
if reply_msg:
|
if reply_msg:
|
||||||
reply_chat_id = reply_msg.get("chat", {}).get("id")
|
reply_chat_id = reply_msg.get("chat", {}).get("id")
|
||||||
if reply_chat_id != FEEDBACK_CHAT_ID:
|
if reply_chat_id != FEEDBACK_CHAT_ID:
|
||||||
await telegram_api("sendMessage", chat_id=reply_chat_id, text=text, reply_to_message_id=reply_msg.get("message_id"))
|
await telegram_api(
|
||||||
|
"sendMessage",
|
||||||
|
chat_id=reply_chat_id,
|
||||||
|
text=text,
|
||||||
|
reply_to_message_id=reply_msg.get("message_id"),
|
||||||
|
)
|
||||||
|
|
||||||
elif bool(text):
|
elif bool(text):
|
||||||
mid = msg.get("message_id")
|
mid = msg.get("message_id")
|
||||||
if text == '/toxic@welcomecenter_bot':
|
if text == "/toxic@welcomecenter_bot":
|
||||||
# latest in chat
|
# latest in chat
|
||||||
latest_toxic_message_id = await redis.get(f"toxic:{cid}")
|
latest_toxic_message_id = await redis.get(f"toxic:{cid}")
|
||||||
|
|
||||||
|
@ -52,46 +59,44 @@ async def messages_routing(msg, state):
|
||||||
one_score = await redis.get(f"toxic:{cid}:{uid}:{reply_to_msg_id}")
|
one_score = await redis.get(f"toxic:{cid}:{uid}:{reply_to_msg_id}")
|
||||||
if one_score:
|
if one_score:
|
||||||
logger.debug(one_score)
|
logger.debug(one_score)
|
||||||
emoji = '😳' if toxic_score > 90 else '😟' if toxic_score > 80 else '😏' if toxic_score > 60 else '🙂' if toxic_score > 20 else '😇'
|
emoji = (
|
||||||
|
"😳"
|
||||||
|
if toxic_score > 90
|
||||||
|
else "😟"
|
||||||
|
if toxic_score > 80
|
||||||
|
else "😏"
|
||||||
|
if toxic_score > 60
|
||||||
|
else "🙂"
|
||||||
|
if toxic_score > 20
|
||||||
|
else "😇"
|
||||||
|
)
|
||||||
text = f"{int(one_score)}% токсичности\nСредняя токсичность сообщений: {toxic_score}% {emoji}"
|
text = f"{int(one_score)}% токсичности\nСредняя токсичность сообщений: {toxic_score}% {emoji}"
|
||||||
await telegram_api(
|
await telegram_api(
|
||||||
"sendMessage",
|
"sendMessage",
|
||||||
chat_id=cid,
|
chat_id=cid,
|
||||||
reply_to_message_id=reply_to_msg_id,
|
reply_to_message_id=reply_to_msg_id,
|
||||||
text=text
|
text=text,
|
||||||
)
|
)
|
||||||
await telegram_api(
|
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
|
||||||
"deleteMessage",
|
elif text == "/removed@welcomecenter_bot":
|
||||||
chat_id=cid,
|
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
|
||||||
message_id=mid
|
|
||||||
)
|
|
||||||
elif text == '/removed@welcomecenter_bot':
|
|
||||||
await telegram_api(
|
|
||||||
"deleteMessage",
|
|
||||||
chat_id=cid,
|
|
||||||
message_id=mid
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
toxic_score = detector(normalize(text))
|
toxic_score = detector(normalize(text))
|
||||||
toxic_perc = math.floor(toxic_score*100)
|
toxic_perc = math.floor(toxic_score * 100)
|
||||||
await redis.set(f"toxic:{cid}", mid)
|
await redis.set(f"toxic:{cid}", mid)
|
||||||
await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60*60*24*3)
|
await redis.set(f"toxic:{cid}:{uid}:{mid}", toxic_perc, ex=60 * 60 * 24 * 3)
|
||||||
logger.info(f'\ntext: {text}\ntoxic: {toxic_perc}%')
|
logger.info(f"\ntext: {text}\ntoxic: {toxic_perc}%")
|
||||||
if toxic_score > 0.81:
|
if toxic_score > 0.81:
|
||||||
if toxic_score > 0.90:
|
if toxic_score > 0.90:
|
||||||
await redis.set(f"removed:{uid}:{cid}:{mid}", text)
|
await redis.set(f"removed:{uid}:{cid}:{mid}", text)
|
||||||
await telegram_api(
|
await telegram_api("deleteMessage", chat_id=cid, message_id=mid)
|
||||||
"deleteMessage",
|
|
||||||
chat_id=cid,
|
|
||||||
message_id=mid
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
await telegram_api(
|
await telegram_api(
|
||||||
"setMessageReaction",
|
"setMessageReaction",
|
||||||
chat_id=cid,
|
chat_id=cid,
|
||||||
is_big=True,
|
is_big=True,
|
||||||
message_id=mid,
|
message_id=mid,
|
||||||
reaction=f'[{{"type":"emoji", "emoji":"🙉"}}]'
|
reaction='[{"type":"emoji", "emoji":"🙉"}]',
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
16
main.py
16
main.py
|
@ -14,7 +14,11 @@ async def start():
|
||||||
logger.info("\n\npolling started\n\n")
|
logger.info("\n\npolling started\n\n")
|
||||||
offset = 0 # init offset
|
offset = 0 # init offset
|
||||||
while True:
|
while True:
|
||||||
response = await telegram_api("getUpdates", offset=offset, allowed_updates=['message', 'message_reaction', 'chat_join_request'])
|
response = await telegram_api(
|
||||||
|
"getUpdates",
|
||||||
|
offset=offset,
|
||||||
|
allowed_updates=["message", "message_reaction", "chat_join_request"],
|
||||||
|
)
|
||||||
# logger.debug(response)
|
# logger.debug(response)
|
||||||
if isinstance(response, dict):
|
if isinstance(response, dict):
|
||||||
result = response.get("result", [])
|
result = response.get("result", [])
|
||||||
|
@ -35,15 +39,21 @@ async def start():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
text = traceback.format_exc()
|
text = traceback.format_exc()
|
||||||
formatted_text = f"```log\n{text}```"
|
formatted_text = f"```log\n{text}```"
|
||||||
await telegram_api("sendMessage", chat_id=FEEDBACK_CHAT_ID, text=formatted_text, parse_mode='MarkdownV2')
|
await telegram_api(
|
||||||
|
"sendMessage",
|
||||||
|
chat_id=FEEDBACK_CHAT_ID,
|
||||||
|
text=formatted_text,
|
||||||
|
parse_mode="MarkdownV2",
|
||||||
|
)
|
||||||
|
|
||||||
offset = update["update_id"] + 1
|
offset = update["update_id"] + 1
|
||||||
|
|
||||||
await asyncio.sleep(1.0)
|
await asyncio.sleep(1.0)
|
||||||
else:
|
else:
|
||||||
logger.error(' \n\n\n!!! getUpdates polling error\n\n\n')
|
logger.error(" \n\n\n!!! getUpdates polling error\n\n\n")
|
||||||
await asyncio.sleep(30.0)
|
await asyncio.sleep(30.0)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import logging
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||||
|
|
||||||
|
@ -6,75 +5,81 @@ from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||||
tokenizer = T5Tokenizer.from_pretrained("google/byt5-small")
|
tokenizer = T5Tokenizer.from_pretrained("google/byt5-small")
|
||||||
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
||||||
|
|
||||||
|
|
||||||
def is_russian_wording(text):
|
def is_russian_wording(text):
|
||||||
"""
|
"""
|
||||||
Check if the text contains any Russian characters by checking
|
Check if the text contains any Russian characters by checking
|
||||||
each character against the Unicode range for Cyrillic.
|
each character against the Unicode range for Cyrillic.
|
||||||
"""
|
"""
|
||||||
for char in text:
|
for char in text:
|
||||||
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
|
if "\u0400" <= char <= "\u04ff": # Unicode range for Cyrillic characters
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def segment_text(text):
|
def segment_text(text):
|
||||||
"""
|
"""
|
||||||
Use a neural network model to segment text into words.
|
Use a neural network model to segment text into words.
|
||||||
"""
|
"""
|
||||||
# Encode the input text for the model
|
# Encode the input text for the model
|
||||||
inputs = tokenizer.encode("segment: " + text, return_tensors="pt")
|
inputs = tokenizer.encode("segment: " + text, return_tensors="pt")
|
||||||
|
|
||||||
# Generate predictions
|
# Generate predictions
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
outputs = model.generate(inputs)
|
outputs = model.generate(inputs)
|
||||||
|
|
||||||
# Decode the generated tokens back to text
|
# Decode the generated tokens back to text
|
||||||
segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
|
||||||
return segmented_text
|
return segmented_text
|
||||||
|
|
||||||
|
|
||||||
def normalize(text):
|
def normalize(text):
|
||||||
"""
|
"""
|
||||||
Normalize English text to resemble Russian characters.
|
Normalize English text to resemble Russian characters.
|
||||||
"""
|
"""
|
||||||
# Segment the text first
|
# Segment the text first
|
||||||
segmented_text = segment_text(text.replace(' ', ' ').replace(' ', ' ').replace(' ', ' '))
|
segmented_text = segment_text(
|
||||||
|
text.replace(" ", " ").replace(" ", " ").replace(" ", " ")
|
||||||
|
)
|
||||||
|
|
||||||
# Normalize after segmentation
|
# Normalize after segmentation
|
||||||
segmented_text = segmented_text.lower()
|
segmented_text = segmented_text.lower()
|
||||||
|
|
||||||
if is_russian_wording(segmented_text):
|
if is_russian_wording(segmented_text):
|
||||||
# Normalize the text by replacing characters
|
# Normalize the text by replacing characters
|
||||||
normalized_text = (segmented_text
|
normalized_text = (
|
||||||
.replace('e', 'е')
|
segmented_text.replace("e", "е")
|
||||||
.replace('o', 'о')
|
.replace("o", "о")
|
||||||
.replace('x', 'х')
|
.replace("x", "х")
|
||||||
.replace('a', 'а')
|
.replace("a", "а")
|
||||||
.replace('r', 'г')
|
.replace("r", "г")
|
||||||
.replace('m', 'м')
|
.replace("m", "м")
|
||||||
.replace('u', 'и')
|
.replace("u", "и")
|
||||||
.replace('n', 'п')
|
.replace("n", "п")
|
||||||
.replace('p', 'р')
|
.replace("p", "р")
|
||||||
.replace('t', 'т')
|
.replace("t", "т")
|
||||||
.replace('y', 'у')
|
.replace("y", "у")
|
||||||
.replace('h', 'н')
|
.replace("h", "н")
|
||||||
.replace('i', 'й')
|
.replace("i", "й")
|
||||||
.replace('c', 'с')
|
.replace("c", "с")
|
||||||
.replace('k', 'к')
|
.replace("k", "к")
|
||||||
.replace('b', 'в')
|
.replace("b", "в")
|
||||||
.replace('3', 'з')
|
.replace("3", "з")
|
||||||
.replace('4', 'ч')
|
.replace("4", "ч")
|
||||||
.replace('0', 'о')
|
.replace("0", "о")
|
||||||
.replace('d', 'д')
|
.replace("d", "д")
|
||||||
.replace('z', 'з'))
|
.replace("z", "з")
|
||||||
|
)
|
||||||
|
|
||||||
return normalized_text
|
return normalized_text
|
||||||
|
|
||||||
return segmented_text
|
return segmented_text
|
||||||
|
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_text = "Hello, this is a test input."
|
input_text = "Hello, this is a test input."
|
||||||
|
|
||||||
normalized_output = normalize(input_text)
|
normalized_output = normalize(input_text)
|
||||||
print(normalized_output)
|
print(normalized_output)
|
||||||
|
|
|
@ -3,17 +3,22 @@ import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
# Load tokenizer and model weights
|
# Load tokenizer and model weights
|
||||||
tokenizer = BertTokenizer.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')
|
tokenizer = BertTokenizer.from_pretrained(
|
||||||
model = BertForSequenceClassification.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')
|
"SkolkovoInstitute/russian_toxicity_classifier"
|
||||||
|
)
|
||||||
|
model = BertForSequenceClassification.from_pretrained(
|
||||||
|
"SkolkovoInstitute/russian_toxicity_classifier"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def detector(text):
|
def detector(text):
|
||||||
# Prepare the input
|
# Prepare the input
|
||||||
batch = tokenizer.encode(text, return_tensors='pt')
|
batch = tokenizer.encode(text, return_tensors="pt")
|
||||||
|
|
||||||
# Inference
|
# Inference
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
result = model(batch)
|
result = model(batch)
|
||||||
|
|
||||||
# Get logits
|
# Get logits
|
||||||
logits = result.logits
|
logits = result.logits
|
||||||
|
|
||||||
|
@ -22,9 +27,11 @@ def detector(text):
|
||||||
|
|
||||||
return probabilities[0][1].item()
|
return probabilities[0][1].item()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
p = detector(sys.argv[1])
|
p = detector(sys.argv[1])
|
||||||
toxicity_percentage = p * 100 # Assuming index 1 is for toxic class
|
toxicity_percentage = p * 100 # Assuming index 1 is for toxic class
|
||||||
print(f"Toxicity Probability: {toxicity_percentage:.2f}%")
|
print(f"Toxicity Probability: {toxicity_percentage:.2f}%")
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
from bot.config import REDIS_URL
|
from bot.config import REDIS_URL
|
||||||
import asyncio
|
|
||||||
import redis.asyncio as aredis
|
import redis.asyncio as aredis
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
# Create a logger instance
|
# Create a logger instance
|
||||||
logger = logging.getLogger('store')
|
logger = logging.getLogger("state.redis")
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
class RedisService:
|
class RedisService:
|
||||||
|
@ -68,9 +67,9 @@ class RedisService:
|
||||||
# Execute the command with the provided arguments
|
# Execute the command with the provided arguments
|
||||||
await self.execute("set", *args)
|
await self.execute("set", *args)
|
||||||
|
|
||||||
async def scan_iter(self, pattern='*'):
|
async def scan_iter(self, pattern="*"):
|
||||||
"""Asynchronously iterate over keys matching the given pattern."""
|
"""Asynchronously iterate over keys matching the given pattern."""
|
||||||
cursor = '0'
|
cursor = "0"
|
||||||
while cursor != 0:
|
while cursor != 0:
|
||||||
cursor, keys = await self._client.scan(cursor=cursor, match=pattern)
|
cursor, keys = await self._client.scan(cursor=cursor, match=pattern)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
|
@ -83,32 +82,3 @@ class RedisService:
|
||||||
redis = RedisService()
|
redis = RedisService()
|
||||||
|
|
||||||
__all__ = ["redis"]
|
__all__ = ["redis"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_all_pattern(uid):
|
|
||||||
pattern = f"removed:{uid}:*"
|
|
||||||
|
|
||||||
# Create a dictionary to hold the keys and values
|
|
||||||
texts = []
|
|
||||||
|
|
||||||
# Use scan_iter to find all keys matching the pattern
|
|
||||||
async for key in redis.scan_iter(pattern):
|
|
||||||
# Fetch the value for each key
|
|
||||||
value = await redis.get(key)
|
|
||||||
if value:
|
|
||||||
texts.append(value.decode('utf-8'))
|
|
||||||
|
|
||||||
return texts
|
|
||||||
|
|
||||||
|
|
||||||
async def get_average_pattern(pattern):
|
|
||||||
scores = []
|
|
||||||
scoring_msg_id = 0
|
|
||||||
async for key in redis.scan_iter(pattern):
|
|
||||||
scr = await redis.get(key)
|
|
||||||
if isinstance(scr, int):
|
|
||||||
scores.append(scr)
|
|
||||||
logger.debug(f'found {len(scores)} messages')
|
|
||||||
toxic_score = math.floor(sum(scores)/len(scores)) if scores else 0
|
|
||||||
return toxic_score
|
|
34
state/scan.py
Normal file
34
state/scan.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
from state.redis import redis
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
|
||||||
|
# Create a logger instance
|
||||||
|
logger = logging.getLogger("state.scan")
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_all_pattern(uid):
|
||||||
|
pattern = f"removed:{uid}:*"
|
||||||
|
|
||||||
|
# Create a dictionary to hold the keys and values
|
||||||
|
texts = []
|
||||||
|
|
||||||
|
# Use scan_iter to find all keys matching the pattern
|
||||||
|
async for key in redis.scan_iter(pattern):
|
||||||
|
# Fetch the value for each key
|
||||||
|
value = await redis.get(key)
|
||||||
|
if value:
|
||||||
|
texts.append(value.decode("utf-8"))
|
||||||
|
|
||||||
|
return texts
|
||||||
|
|
||||||
|
|
||||||
|
async def get_average_pattern(pattern):
|
||||||
|
scores = []
|
||||||
|
async for key in redis.scan_iter(pattern):
|
||||||
|
scr = await redis.get(key)
|
||||||
|
if isinstance(scr, int):
|
||||||
|
scores.append(scr)
|
||||||
|
logger.debug(f"found {len(scores)} messages")
|
||||||
|
toxic_score = math.floor(sum(scores) / len(scores)) if scores else 0
|
||||||
|
return toxic_score
|
|
@ -1,8 +1,9 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
# Define SVG code generation function with member_id parameter
|
# Define SVG code generation function with member_id parameter
|
||||||
def generate_chart(members, member_id=None):
|
def generate_chart(members, member_id=None):
|
||||||
if not member_id:
|
if not member_id:
|
||||||
|
|
|
@ -12,7 +12,7 @@ def mention(user):
|
||||||
|
|
||||||
|
|
||||||
def userdata_extract(user):
|
def userdata_extract(user):
|
||||||
ln = " " + user.get('last_name', "") if user.get('last_name', "") else ""
|
ln = " " + user.get("last_name", "") if user.get("last_name", "") else ""
|
||||||
identity = f"{user['first_name']}{ln}"
|
identity = f"{user['first_name']}{ln}"
|
||||||
uid = user["id"]
|
uid = user["id"]
|
||||||
username = user.get("username", "")
|
username = user.get("username", "")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user