ruffed
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
# Define SVG code generation function with member_id parameter
|
||||
def generate_chart(members, member_id=None):
|
||||
if not member_id:
|
||||
|
@@ -12,7 +12,7 @@ def mention(user):
|
||||
|
||||
|
||||
def userdata_extract(user):
|
||||
ln = " " + user.get('last_name', "") if user.get('last_name', "") else ""
|
||||
ln = " " + user.get("last_name", "") if user.get("last_name", "") else ""
|
||||
identity = f"{user['first_name']}{ln}"
|
||||
uid = user["id"]
|
||||
username = user.get("username", "")
|
||||
|
@@ -1,80 +0,0 @@
|
||||
import logging
|
||||
import torch
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
# Initialize the T5 model and tokenizer
|
||||
tokenizer = T5Tokenizer.from_pretrained("google/byt5-small")
|
||||
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
||||
|
||||
def is_russian_wording(text):
|
||||
"""
|
||||
Check if the text contains any Russian characters by checking
|
||||
each character against the Unicode range for Cyrillic.
|
||||
"""
|
||||
for char in text:
|
||||
if '\u0400' <= char <= '\u04FF': # Unicode range for Cyrillic characters
|
||||
return True
|
||||
return False
|
||||
|
||||
def segment_text(text):
|
||||
"""
|
||||
Use a neural network model to segment text into words.
|
||||
"""
|
||||
# Encode the input text for the model
|
||||
inputs = tokenizer.encode("segment: " + text, return_tensors="pt")
|
||||
|
||||
# Generate predictions
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(inputs)
|
||||
|
||||
# Decode the generated tokens back to text
|
||||
segmented_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
return segmented_text
|
||||
|
||||
def normalize(text):
|
||||
"""
|
||||
Normalize English text to resemble Russian characters.
|
||||
"""
|
||||
# Segment the text first
|
||||
segmented_text = segment_text(text.replace(' ', ' ').replace(' ', ' ').replace(' ', ' '))
|
||||
|
||||
# Normalize after segmentation
|
||||
segmented_text = segmented_text.lower()
|
||||
|
||||
if is_russian_wording(segmented_text):
|
||||
# Normalize the text by replacing characters
|
||||
normalized_text = (segmented_text
|
||||
.replace('e', 'е')
|
||||
.replace('o', 'о')
|
||||
.replace('x', 'х')
|
||||
.replace('a', 'а')
|
||||
.replace('r', 'г')
|
||||
.replace('m', 'м')
|
||||
.replace('u', 'и')
|
||||
.replace('n', 'п')
|
||||
.replace('p', 'р')
|
||||
.replace('t', 'т')
|
||||
.replace('y', 'у')
|
||||
.replace('h', 'н')
|
||||
.replace('i', 'й')
|
||||
.replace('c', 'с')
|
||||
.replace('k', 'к')
|
||||
.replace('b', 'в')
|
||||
.replace('3', 'з')
|
||||
.replace('4', 'ч')
|
||||
.replace('0', 'о')
|
||||
.replace('d', 'д')
|
||||
.replace('z', 'з'))
|
||||
|
||||
return normalized_text
|
||||
|
||||
return segmented_text
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
input_text = "Hello, this is a test input."
|
||||
|
||||
normalized_output = normalize(input_text)
|
||||
print(normalized_output)
|
||||
|
114
utils/store.py
114
utils/store.py
@@ -1,114 +0,0 @@
|
||||
from bot.config import REDIS_URL
|
||||
import asyncio
|
||||
import redis.asyncio as aredis
|
||||
import logging
|
||||
|
||||
# Create a logger instance
|
||||
logger = logging.getLogger('store')
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
class RedisService:
|
||||
def __init__(self, uri=REDIS_URL):
|
||||
self._uri: str = uri
|
||||
self.pubsub_channels = []
|
||||
self._client = None
|
||||
|
||||
async def connect(self):
|
||||
self._client = aredis.Redis.from_url(self._uri, decode_responses=True)
|
||||
|
||||
async def disconnect(self):
|
||||
if self._client:
|
||||
await self._client.close()
|
||||
|
||||
async def execute(self, command, *args, **kwargs):
|
||||
if self._client:
|
||||
try:
|
||||
logger.debug(f"{command}") # {args[0]}") # {args} {kwargs}")
|
||||
for arg in args:
|
||||
if isinstance(arg, dict):
|
||||
if arg.get("_sa_instance_state"):
|
||||
del arg["_sa_instance_state"]
|
||||
r = await self._client.execute_command(command, *args, **kwargs)
|
||||
# logger.debug(type(r))
|
||||
# logger.debug(r)
|
||||
return r
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
async def subscribe(self, *channels):
|
||||
if self._client:
|
||||
async with self._client.pubsub() as pubsub:
|
||||
for channel in channels:
|
||||
await pubsub.subscribe(channel)
|
||||
self.pubsub_channels.append(channel)
|
||||
|
||||
async def unsubscribe(self, *channels):
|
||||
if not self._client:
|
||||
return
|
||||
async with self._client.pubsub() as pubsub:
|
||||
for channel in channels:
|
||||
await pubsub.unsubscribe(channel)
|
||||
self.pubsub_channels.remove(channel)
|
||||
|
||||
async def publish(self, channel, data):
|
||||
if not self._client:
|
||||
return
|
||||
await self._client.publish(channel, data)
|
||||
|
||||
async def set(self, key, data, ex=None):
|
||||
# Prepare the command arguments
|
||||
args = [key, data]
|
||||
|
||||
# If an expiration time is provided, add it to the arguments
|
||||
if ex is not None:
|
||||
args.append("EX")
|
||||
args.append(ex)
|
||||
|
||||
# Execute the command with the provided arguments
|
||||
await self.execute("set", *args)
|
||||
|
||||
async def scan_iter(self, pattern='*'):
|
||||
"""Asynchronously iterate over keys matching the given pattern."""
|
||||
cursor = '0'
|
||||
while cursor != 0:
|
||||
cursor, keys = await self._client.scan(cursor=cursor, match=pattern)
|
||||
for key in keys:
|
||||
yield key
|
||||
|
||||
async def get(self, key):
|
||||
return await self.execute("get", key)
|
||||
|
||||
|
||||
redis = RedisService()
|
||||
|
||||
__all__ = ["redis"]
|
||||
|
||||
|
||||
|
||||
async def get_all_pattern(uid):
|
||||
pattern = f"removed:{uid}:*"
|
||||
|
||||
# Create a dictionary to hold the keys and values
|
||||
texts = []
|
||||
|
||||
# Use scan_iter to find all keys matching the pattern
|
||||
async for key in redis.scan_iter(pattern):
|
||||
# Fetch the value for each key
|
||||
value = await redis.get(key)
|
||||
if value:
|
||||
texts.append(value.decode('utf-8'))
|
||||
|
||||
return texts
|
||||
|
||||
|
||||
async def get_average_pattern(pattern):
|
||||
scores = []
|
||||
scoring_msg_id = 0
|
||||
async for key in redis.scan_iter(pattern):
|
||||
scr = await redis.get(key)
|
||||
if isinstance(scr, int):
|
||||
scores.append(scr)
|
||||
logger.debug(f'found {len(scores)} messages')
|
||||
toxic_score = math.floor(sum(scores)/len(scores)) if scores else 0
|
||||
return toxic_score
|
Reference in New Issue
Block a user