ruffed

2024-09-27 09:23:55 +03:00
parent c1e481ded8
commit a7b1925e8d
12 changed files with 218 additions and 139 deletions
--- a/utils/graph.py
+++ b/utils/graph.py
@@ -1,8 +1,9 @@
-
 import logging
+
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)

+
 # Define SVG code generation function with member_id parameter
 def generate_chart(members, member_id=None):
    if not member_id:
--- a/utils/mention.py
+++ b/utils/mention.py
@@ -12,7 +12,7 @@ def mention(user):


 def userdata_extract(user):
-    ln = " " + user.get('last_name', "") if user.get('last_name', "") else ""
+    ln = " " + user.get("last_name", "") if user.get("last_name", "") else ""
    identity = f"{user['first_name']}{ln}"
    uid = user["id"]
    username = user.get("username", "")
--- a/utils/normalize.py
+++ b/utils/normalize.py
@@ -1,80 +0,0 @@
-import logging
-import torch
-from transformers import T5Tokenizer, T5ForConditionalGeneration
-
-# Initialize the T5 model and tokenizer
-tokenizer = T5Tokenizer.from_pretrained("google/byt5-small")
-model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
-
-def is_russian_wording(text):
-    """
-    Check if the text contains any Russian characters by checking 
-    each character against the Unicode range for Cyrillic.
-    """
-    for char in text:
-        if '\u0400' <= char <= '\u04FF':  # Unicode range for Cyrillic characters
-            return True
-    return False
-
-def segment_text(text):
-    """
-    Use a neural network model to segment text into words.
-    """
-    # Encode the input text for the model
-    inputs = tokenizer.encode("segment: " + text, return_tensors="pt")
-    
-    # Generate predictions
-    with torch.no_grad():
-        outputs = model.generate(inputs)
-    
-    # Decode the generated tokens back to text
-    segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    
-    return segmented_text
-
-def normalize(text):
-    """
-    Normalize English text to resemble Russian characters.
-    """
-    # Segment the text first
-    segmented_text = segment_text(text.replace('  ', ' ').replace('  ', ' ').replace('  ', ' '))
-    
-    # Normalize after segmentation
-    segmented_text = segmented_text.lower()
-    
-    if is_russian_wording(segmented_text):
-        # Normalize the text by replacing characters
-        normalized_text = (segmented_text
-                           .replace('e', 'е')
-                           .replace('o', 'о')
-                           .replace('x', 'х')
-                           .replace('a', 'а')
-                           .replace('r', 'г')
-                           .replace('m', 'м')
-                           .replace('u', 'и')
-                           .replace('n', 'п')
-                           .replace('p', 'р')
-                           .replace('t', 'т')
-                           .replace('y', 'у')
-                           .replace('h', 'н')
-                           .replace('i', 'й')
-                           .replace('c', 'с')
-                           .replace('k', 'к')
-                           .replace('b', 'в')
-                           .replace('3', 'з')
-                           .replace('4', 'ч')
-                           .replace('0', 'о')
-                           .replace('d', 'д')
-                           .replace('z', 'з'))
-        
-        return normalized_text
-    
-    return segmented_text
-
-# Example usage
-if __name__ == "__main__":
-    input_text = "Hello, this is a test input."
-    
-    normalized_output = normalize(input_text)
-    print(normalized_output)
-
--- a/utils/store.py
+++ b/utils/store.py
@@ -1,114 +0,0 @@
-from bot.config import REDIS_URL
-import asyncio
-import redis.asyncio as aredis
-import logging
-
-# Create a logger instance
-logger = logging.getLogger('store')
-logging.basicConfig(level=logging.DEBUG)
-
-
-class RedisService:
-    def __init__(self, uri=REDIS_URL):
-        self._uri: str = uri
-        self.pubsub_channels = []
-        self._client = None
-
-    async def connect(self):
-        self._client = aredis.Redis.from_url(self._uri, decode_responses=True)
-
-    async def disconnect(self):
-        if self._client:
-            await self._client.close()
-
-    async def execute(self, command, *args, **kwargs):
-        if self._client:
-            try:
-                logger.debug(f"{command}")  # {args[0]}") # {args} {kwargs}")
-                for arg in args:
-                    if isinstance(arg, dict):
-                        if arg.get("_sa_instance_state"):
-                            del arg["_sa_instance_state"]
-                r = await self._client.execute_command(command, *args, **kwargs)
-                # logger.debug(type(r))
-                # logger.debug(r)
-                return r
-            except Exception as e:
-                logger.error(e)
-
-    async def subscribe(self, *channels):
-        if self._client:
-            async with self._client.pubsub() as pubsub:
-                for channel in channels:
-                    await pubsub.subscribe(channel)
-                    self.pubsub_channels.append(channel)
-
-    async def unsubscribe(self, *channels):
-        if not self._client:
-            return
-        async with self._client.pubsub() as pubsub:
-            for channel in channels:
-                await pubsub.unsubscribe(channel)
-                self.pubsub_channels.remove(channel)
-
-    async def publish(self, channel, data):
-        if not self._client:
-            return
-        await self._client.publish(channel, data)
-
-    async def set(self, key, data, ex=None):
-        # Prepare the command arguments
-        args = [key, data]
-
-        # If an expiration time is provided, add it to the arguments
-        if ex is not None:
-            args.append("EX")
-            args.append(ex)
-
-        # Execute the command with the provided arguments
-        await self.execute("set", *args)
-
-    async def scan_iter(self, pattern='*'):
-        """Asynchronously iterate over keys matching the given pattern."""
-        cursor = '0'
-        while cursor != 0:
-            cursor, keys = await self._client.scan(cursor=cursor, match=pattern)
-            for key in keys:
-                yield key
-
-    async def get(self, key):
-        return await self.execute("get", key)
-
-
-redis = RedisService()
-
-__all__ = ["redis"]
-
-
-
-async def get_all_pattern(uid):
-    pattern = f"removed:{uid}:*"
-
-    # Create a dictionary to hold the keys and values
-    texts = []
-
-    # Use scan_iter to find all keys matching the pattern
-    async for key in redis.scan_iter(pattern):
-        # Fetch the value for each key
-        value = await redis.get(key)
-        if value:
-            texts.append(value.decode('utf-8'))
-
-    return texts
-
-
-async def get_average_pattern(pattern):
-    scores = []
-    scoring_msg_id = 0
-    async for key in redis.scan_iter(pattern):
-        scr = await redis.get(key)
-        if isinstance(scr, int):
-            scores.append(scr)
-    logger.debug(f'found {len(scores)} messages')
-    toxic_score = math.floor(sum(scores)/len(scores)) if scores else 0
-    return toxic_score