This commit is contained in:
2024-09-27 09:23:55 +03:00
parent c1e481ded8
commit a7b1925e8d
12 changed files with 218 additions and 139 deletions

View File

@@ -1,8 +1,9 @@
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Define SVG code generation function with member_id parameter
def generate_chart(members, member_id=None):
if not member_id:

View File

@@ -12,7 +12,7 @@ def mention(user):
def userdata_extract(user):
ln = " " + user.get('last_name', "") if user.get('last_name', "") else ""
ln = " " + user.get("last_name", "") if user.get("last_name", "") else ""
identity = f"{user['first_name']}{ln}"
uid = user["id"]
username = user.get("username", "")

View File

@@ -1,80 +0,0 @@
import logging
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
# Initialize the T5 model and tokenizer
tokenizer = T5Tokenizer.from_pretrained("google/byt5-small")
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
def is_russian_wording(text):
"""
Check if the text contains any Russian characters by checking
each character against the Unicode range for Cyrillic.
"""
for char in text:
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
return True
return False
def segment_text(text):
"""
Use a neural network model to segment text into words.
"""
# Encode the input text for the model
inputs = tokenizer.encode("segment: " + text, return_tensors="pt")
# Generate predictions
with torch.no_grad():
outputs = model.generate(inputs)
# Decode the generated tokens back to text
segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return segmented_text
def normalize(text):
"""
Normalize English text to resemble Russian characters.
"""
# Segment the text first
segmented_text = segment_text(text.replace(' ', ' ').replace(' ', ' ').replace(' ', ' '))
# Normalize after segmentation
segmented_text = segmented_text.lower()
if is_russian_wording(segmented_text):
# Normalize the text by replacing characters
normalized_text = (segmented_text
.replace('e', 'е')
.replace('o', 'о')
.replace('x', 'х')
.replace('a', 'а')
.replace('r', 'г')
.replace('m', 'м')
.replace('u', 'и')
.replace('n', 'п')
.replace('p', 'р')
.replace('t', 'т')
.replace('y', 'у')
.replace('h', 'н')
.replace('i', 'й')
.replace('c', 'с')
.replace('k', 'к')
.replace('b', 'в')
.replace('3', 'з')
.replace('4', 'ч')
.replace('0', 'о')
.replace('d', 'д')
.replace('z', 'з'))
return normalized_text
return segmented_text
# Example usage
if __name__ == "__main__":
input_text = "Hello, this is a test input."
normalized_output = normalize(input_text)
print(normalized_output)

View File

@@ -1,114 +0,0 @@
from bot.config import REDIS_URL
import asyncio
import redis.asyncio as aredis
import logging
# Create a logger instance
logger = logging.getLogger('store')
logging.basicConfig(level=logging.DEBUG)
class RedisService:
def __init__(self, uri=REDIS_URL):
self._uri: str = uri
self.pubsub_channels = []
self._client = None
async def connect(self):
self._client = aredis.Redis.from_url(self._uri, decode_responses=True)
async def disconnect(self):
if self._client:
await self._client.close()
async def execute(self, command, *args, **kwargs):
if self._client:
try:
logger.debug(f"{command}") # {args[0]}") # {args} {kwargs}")
for arg in args:
if isinstance(arg, dict):
if arg.get("_sa_instance_state"):
del arg["_sa_instance_state"]
r = await self._client.execute_command(command, *args, **kwargs)
# logger.debug(type(r))
# logger.debug(r)
return r
except Exception as e:
logger.error(e)
async def subscribe(self, *channels):
if self._client:
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.subscribe(channel)
self.pubsub_channels.append(channel)
async def unsubscribe(self, *channels):
if not self._client:
return
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.unsubscribe(channel)
self.pubsub_channels.remove(channel)
async def publish(self, channel, data):
if not self._client:
return
await self._client.publish(channel, data)
async def set(self, key, data, ex=None):
# Prepare the command arguments
args = [key, data]
# If an expiration time is provided, add it to the arguments
if ex is not None:
args.append("EX")
args.append(ex)
# Execute the command with the provided arguments
await self.execute("set", *args)
async def scan_iter(self, pattern='*'):
"""Asynchronously iterate over keys matching the given pattern."""
cursor = '0'
while cursor != 0:
cursor, keys = await self._client.scan(cursor=cursor, match=pattern)
for key in keys:
yield key
async def get(self, key):
return await self.execute("get", key)
redis = RedisService()
__all__ = ["redis"]
async def get_all_pattern(uid):
pattern = f"removed:{uid}:*"
# Create a dictionary to hold the keys and values
texts = []
# Use scan_iter to find all keys matching the pattern
async for key in redis.scan_iter(pattern):
# Fetch the value for each key
value = await redis.get(key)
if value:
texts.append(value.decode('utf-8'))
return texts
async def get_average_pattern(pattern):
scores = []
scoring_msg_id = 0
async for key in redis.scan_iter(pattern):
scr = await redis.get(key)
if isinstance(scr, int):
scores.append(scr)
logger.debug(f'found {len(scores)} messages')
toxic_score = math.floor(sum(scores)/len(scores)) if scores else 0
return toxic_score