Improve topic sorting: add popular sorting by publications and authors count

This commit is contained in:
2025-06-02 02:56:11 +03:00
parent baca19a4d5
commit 3327976586
113 changed files with 7238 additions and 3739 deletions

View File

@@ -1,5 +1,5 @@
from functools import wraps
from typing import Tuple
from typing import Any, Callable, Optional
from sqlalchemy import exc
from starlette.requests import Request
@@ -16,7 +16,7 @@ from utils.logger import root_logger as logger
ALLOWED_HEADERS = ["Authorization", "Content-Type"]
async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
async def check_auth(req: Request) -> tuple[int, list[str], bool]:
"""
Проверка авторизации пользователя.
@@ -30,11 +30,16 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
- user_roles: list[str] - Список ролей пользователя
- is_admin: bool - Флаг наличия у пользователя административных прав
"""
logger.debug(f"[check_auth] Проверка авторизации...")
logger.debug("[check_auth] Проверка авторизации...")
# Получаем заголовок авторизации
token = None
# Если req is None (в тестах), возвращаем пустые данные
if not req:
logger.debug("[check_auth] Запрос отсутствует (тестовое окружение)")
return 0, [], False
# Проверяем заголовок с учетом регистра
headers_dict = dict(req.headers.items())
logger.debug(f"[check_auth] Все заголовки: {headers_dict}")
@@ -47,8 +52,8 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
break
if not token:
logger.debug(f"[check_auth] Токен не найден в заголовках")
return "", [], False
logger.debug("[check_auth] Токен не найден в заголовках")
return 0, [], False
# Очищаем токен от префикса Bearer если он есть
if token.startswith("Bearer "):
@@ -67,7 +72,10 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
with local_session() as session:
# Преобразуем user_id в число
try:
user_id_int = int(user_id.strip())
if isinstance(user_id, str):
user_id_int = int(user_id.strip())
else:
user_id_int = int(user_id)
except (ValueError, TypeError):
logger.error(f"Невозможно преобразовать user_id {user_id} в число")
else:
@@ -86,7 +94,7 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
return user_id, user_roles, is_admin
async def add_user_role(user_id: str, roles: list[str] = None):
async def add_user_role(user_id: str, roles: Optional[list[str]] = None) -> Optional[str]:
"""
Добавление ролей пользователю в локальной БД.
@@ -105,7 +113,7 @@ async def add_user_role(user_id: str, roles: list[str] = None):
author = session.query(Author).filter(Author.id == user_id).one()
# Получаем существующие роли
existing_roles = set(role.name for role in author.roles)
existing_roles = {role.name for role in author.roles}
# Добавляем новые роли
for role_name in roles:
@@ -127,29 +135,43 @@ async def add_user_role(user_id: str, roles: list[str] = None):
return None
def login_required(f):
def login_required(f: Callable) -> Callable:
"""Декоратор для проверки авторизации пользователя. Требуется наличие роли 'reader'."""
@wraps(f)
async def decorated_function(*args, **kwargs):
async def decorated_function(*args: Any, **kwargs: Any) -> Any:
from graphql.error import GraphQLError
info = args[1]
req = info.context.get("request")
logger.debug(f"[login_required] Проверка авторизации для запроса: {req.method} {req.url.path}")
logger.debug(f"[login_required] Заголовки: {req.headers}")
logger.debug(
f"[login_required] Проверка авторизации для запроса: {req.method if req else 'unknown'} {req.url.path if req and hasattr(req, 'url') else 'unknown'}"
)
logger.debug(f"[login_required] Заголовки: {req.headers if req else 'none'}")
user_id, user_roles, is_admin = await check_auth(req)
# Для тестового режима: если req отсутствует, но в контексте есть author и roles
if not req and info.context.get("author") and info.context.get("roles"):
logger.debug("[login_required] Тестовый режим: используем данные из контекста")
user_id = info.context["author"]["id"]
user_roles = info.context["roles"]
is_admin = info.context.get("is_admin", False)
else:
# Обычный режим: проверяем через HTTP заголовки
user_id, user_roles, is_admin = await check_auth(req)
if not user_id:
logger.debug(f"[login_required] Пользователь не авторизован, {dict(req)}, {info}")
raise GraphQLError("Требуется авторизация")
logger.debug(
f"[login_required] Пользователь не авторизован, req={dict(req) if req else 'None'}, info={info}"
)
msg = "Требуется авторизация"
raise GraphQLError(msg)
# Проверяем наличие роли reader
if "reader" not in user_roles:
logger.error(f"Пользователь {user_id} не имеет роли 'reader'")
raise GraphQLError("У вас нет необходимых прав для доступа")
msg = "У вас нет необходимых прав для доступа"
raise GraphQLError(msg)
logger.info(f"Авторизован пользователь {user_id} с ролями: {user_roles}")
info.context["roles"] = user_roles
@@ -157,21 +179,27 @@ def login_required(f):
# Проверяем права администратора
info.context["is_admin"] = is_admin
author = await get_cached_author_by_id(user_id, get_with_stat)
if not author:
logger.error(f"Профиль автора не найден для пользователя {user_id}")
info.context["author"] = author
# В тестовом режиме автор уже может быть в контексте
if (
not info.context.get("author")
or not isinstance(info.context["author"], dict)
or "dict" not in str(type(info.context["author"]))
):
author = await get_cached_author_by_id(user_id, get_with_stat)
if not author:
logger.error(f"Профиль автора не найден для пользователя {user_id}")
info.context["author"] = author
return await f(*args, **kwargs)
return decorated_function
def login_accepted(f):
def login_accepted(f: Callable) -> Callable:
"""Декоратор для добавления данных авторизации в контекст."""
@wraps(f)
async def decorated_function(*args, **kwargs):
async def decorated_function(*args: Any, **kwargs: Any) -> Any:
info = args[1]
req = info.context.get("request")
@@ -192,7 +220,7 @@ def login_accepted(f):
logger.debug(f"login_accepted: Найден профиль автора: {author}")
# Используем флаг is_admin из контекста или передаем права владельца для собственных данных
is_owner = True # Пользователь всегда является владельцем собственного профиля
info.context["author"] = author.dict(access=is_owner or is_admin)
info.context["author"] = author.dict(is_owner or is_admin)
else:
logger.error(
f"login_accepted: Профиль автора не найден для пользователя {user_id}. Используем базовые данные."

View File

@@ -1,8 +1,9 @@
from dataclasses import dataclass
from typing import List, Optional
from typing import Any
from auth.orm import Author
from orm.community import Community
from orm.draft import Draft
from orm.reaction import Reaction
from orm.shout import Shout
from orm.topic import Topic
@@ -10,15 +11,29 @@ from orm.topic import Topic
@dataclass
class CommonResult:
error: Optional[str] = None
slugs: Optional[List[str]] = None
shout: Optional[Shout] = None
shouts: Optional[List[Shout]] = None
author: Optional[Author] = None
authors: Optional[List[Author]] = None
reaction: Optional[Reaction] = None
reactions: Optional[List[Reaction]] = None
topic: Optional[Topic] = None
topics: Optional[List[Topic]] = None
community: Optional[Community] = None
communities: Optional[List[Community]] = None
"""Общий результат для GraphQL запросов"""
error: str | None = None
drafts: list[Draft] | None = None # Draft objects
draft: Draft | None = None # Draft object
slugs: list[str] | None = None
shout: Shout | None = None
shouts: list[Shout] | None = None
author: Author | None = None
authors: list[Author] | None = None
reaction: Reaction | None = None
reactions: list[Reaction] | None = None
topic: Topic | None = None
topics: list[Topic] | None = None
community: Community | None = None
communities: list[Community] | None = None
@dataclass
class AuthorFollowsResult:
"""Результат для get_author_follows запроса"""
topics: list[Any] | None = None # Topic dicts
authors: list[Any] | None = None # Author dicts
communities: list[Any] | None = None # Community dicts
error: str | None = None

View File

@@ -1,174 +1,55 @@
import builtins
import logging
import math
import time
import traceback
import warnings
from typing import Any, Callable, Dict, TypeVar
from io import TextIOWrapper
from typing import Any, ClassVar, Type, TypeVar, Union
import orjson
import sqlalchemy
from sqlalchemy import (
JSON,
Column,
Engine,
Index,
Integer,
create_engine,
event,
exc,
func,
inspect,
text,
)
from sqlalchemy import JSON, Column, Integer, create_engine, event, exc, func, inspect
from sqlalchemy.dialects.sqlite import insert
from sqlalchemy.engine import Connection, Engine
from sqlalchemy.orm import Session, configure_mappers, declarative_base, joinedload
from sqlalchemy.sql.schema import Table
from sqlalchemy.pool import StaticPool
from settings import DB_URL
from utils.logger import root_logger as logger
if DB_URL.startswith("postgres"):
engine = create_engine(
DB_URL,
echo=False,
pool_size=10,
max_overflow=20,
pool_timeout=30, # Время ожидания свободного соединения
pool_recycle=1800, # Время жизни соединения
pool_pre_ping=True, # Добавить проверку соединений
connect_args={
"sslmode": "disable",
"connect_timeout": 40, # Добавить таймаут подключения
},
)
else:
engine = create_engine(DB_URL, echo=False, connect_args={"check_same_thread": False})
# Global variables
REGISTRY: dict[str, type["BaseModel"]] = {}
logger = logging.getLogger(__name__)
# Database configuration
engine = create_engine(DB_URL, echo=False, poolclass=StaticPool if "sqlite" in DB_URL else None)
ENGINE = engine # Backward compatibility alias
inspector = inspect(engine)
configure_mappers()
T = TypeVar("T")
REGISTRY: Dict[str, type] = {}
FILTERED_FIELDS = ["_sa_instance_state", "search_vector"]
# Создаем Base для внутреннего использования
_Base = declarative_base()
def create_table_if_not_exists(engine, table):
"""
Создает таблицу, если она не существует в базе данных.
Args:
engine: SQLAlchemy движок базы данных
table: Класс модели SQLAlchemy
"""
inspector = inspect(engine)
if table and not inspector.has_table(table.__tablename__):
try:
table.__table__.create(engine)
logger.info(f"Table '{table.__tablename__}' created.")
except exc.OperationalError as e:
# Проверяем, содержит ли ошибка упоминание о том, что индекс уже существует
if "already exists" in str(e):
logger.warning(f"Skipping index creation for table '{table.__tablename__}': {e}")
else:
# Перевыбрасываем ошибку, если она не связана с дублированием
raise
else:
logger.info(f"Table '{table.__tablename__}' ok.")
# Create proper type alias for Base
BaseType = Type[_Base] # type: ignore[valid-type]
def sync_indexes():
"""
Синхронизирует индексы в БД с индексами, определенными в моделях SQLAlchemy.
Создает недостающие индексы, если они определены в моделях, но отсутствуют в БД.
Использует pg_catalog для PostgreSQL для получения списка существующих индексов.
"""
if not DB_URL.startswith("postgres"):
logger.warning("Функция sync_indexes поддерживается только для PostgreSQL.")
return
logger.info("Начинаем синхронизацию индексов в базе данных...")
# Получаем все существующие индексы в БД
with local_session() as session:
existing_indexes_query = text("""
SELECT
t.relname AS table_name,
i.relname AS index_name
FROM
pg_catalog.pg_class i
JOIN
pg_catalog.pg_index ix ON ix.indexrelid = i.oid
JOIN
pg_catalog.pg_class t ON t.oid = ix.indrelid
JOIN
pg_catalog.pg_namespace n ON n.oid = i.relnamespace
WHERE
i.relkind = 'i'
AND n.nspname = 'public'
AND t.relkind = 'r'
ORDER BY
t.relname, i.relname;
""")
existing_indexes = {row[1].lower() for row in session.execute(existing_indexes_query)}
logger.debug(f"Найдено {len(existing_indexes)} существующих индексов в БД")
# Проверяем каждую модель и её индексы
for _model_name, model_class in REGISTRY.items():
if hasattr(model_class, "__table__") and hasattr(model_class, "__table_args__"):
table_args = model_class.__table_args__
# Если table_args - это кортеж, ищем в нём объекты Index
if isinstance(table_args, tuple):
for arg in table_args:
if isinstance(arg, Index):
index_name = arg.name.lower()
# Проверяем, существует ли индекс в БД
if index_name not in existing_indexes:
logger.info(
f"Создаем отсутствующий индекс {index_name} для таблицы {model_class.__tablename__}"
)
# Создаем индекс если он отсутствует
try:
arg.create(engine)
logger.info(f"Индекс {index_name} успешно создан")
except Exception as e:
logger.error(f"Ошибка при создании индекса {index_name}: {e}")
else:
logger.debug(f"Индекс {index_name} уже существует")
# Анализируем таблицы для оптимизации запросов
for model_name, model_class in REGISTRY.items():
if hasattr(model_class, "__tablename__"):
try:
session.execute(text(f"ANALYZE {model_class.__tablename__}"))
logger.debug(f"Таблица {model_class.__tablename__} проанализирована")
except Exception as e:
logger.error(f"Ошибка при анализе таблицы {model_class.__tablename__}: {e}")
logger.info("Синхронизация индексов завершена.")
# noinspection PyUnusedLocal
def local_session(src=""):
return Session(bind=engine, expire_on_commit=False)
class Base(declarative_base()):
__table__: Table
__tablename__: str
__new__: Callable
__init__: Callable
__allow_unmapped__ = True
class BaseModel(_Base): # type: ignore[valid-type,misc]
__abstract__ = True
__table_args__ = {"extend_existing": True}
__allow_unmapped__ = True
__table_args__: ClassVar[Union[dict[str, Any], tuple]] = {"extend_existing": True}
id = Column(Integer, primary_key=True)
def __init_subclass__(cls, **kwargs):
def __init_subclass__(cls, **kwargs: Any) -> None:
REGISTRY[cls.__name__] = cls
super().__init_subclass__(**kwargs)
def dict(self) -> Dict[str, Any]:
def dict(self, access: bool = False) -> builtins.dict[str, Any]:
"""
Конвертирует ORM объект в словарь.
@@ -194,7 +75,7 @@ class Base(declarative_base()):
try:
data[column_name] = orjson.loads(value)
except (TypeError, orjson.JSONDecodeError) as e:
logger.error(f"Error decoding JSON for column '{column_name}': {e}")
logger.exception(f"Error decoding JSON for column '{column_name}': {e}")
data[column_name] = value
else:
data[column_name] = value
@@ -207,10 +88,10 @@ class Base(declarative_base()):
if hasattr(self, "stat"):
data["stat"] = self.stat
except Exception as e:
logger.error(f"Error occurred while converting object to dictionary: {e}")
logger.exception(f"Error occurred while converting object to dictionary: {e}")
return data
def update(self, values: Dict[str, Any]) -> None:
def update(self, values: builtins.dict[str, Any]) -> None:
for key, value in values.items():
if hasattr(self, key):
setattr(self, key, value)
@@ -221,31 +102,38 @@ class Base(declarative_base()):
# Функция для вывода полного трейсбека при предупреждениях
def warning_with_traceback(message: Warning | str, category, filename: str, lineno: int, file=None, line=None):
def warning_with_traceback(
message: Warning | str,
category: type[Warning],
filename: str,
lineno: int,
file: TextIOWrapper | None = None,
line: str | None = None,
) -> None:
tb = traceback.format_stack()
tb_str = "".join(tb)
return f"{message} ({filename}, {lineno}): {category.__name__}\n{tb_str}"
print(f"{message} ({filename}, {lineno}): {category.__name__}\n{tb_str}")
# Установка функции вывода трейсбека для предупреждений SQLAlchemy
warnings.showwarning = warning_with_traceback
warnings.showwarning = warning_with_traceback # type: ignore[assignment]
warnings.simplefilter("always", exc.SAWarning)
# Функция для извлечения SQL-запроса из контекста
def get_statement_from_context(context):
def get_statement_from_context(context: Connection) -> str | None:
query = ""
compiled = context.compiled
compiled = getattr(context, "compiled", None)
if compiled:
compiled_statement = compiled.string
compiled_parameters = compiled.params
compiled_statement = getattr(compiled, "string", None)
compiled_parameters = getattr(compiled, "params", None)
if compiled_statement:
if compiled_parameters:
try:
# Безопасное форматирование параметров
query = compiled_statement % compiled_parameters
except Exception as e:
logger.error(f"Error formatting query: {e}")
logger.exception(f"Error formatting query: {e}")
else:
query = compiled_statement
if query:
@@ -255,18 +143,32 @@ def get_statement_from_context(context):
# Обработчик события перед выполнением запроса
@event.listens_for(Engine, "before_cursor_execute")
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
conn.query_start_time = time.time()
conn.cursor_id = id(cursor) # Отслеживание конкретного курсора
def before_cursor_execute(
conn: Connection,
cursor: Any,
statement: str,
parameters: dict[str, Any] | None,
context: Connection,
executemany: bool,
) -> None:
conn.query_start_time = time.time() # type: ignore[attr-defined]
conn.cursor_id = id(cursor) # type: ignore[attr-defined]
# Обработчик события после выполнения запроса
@event.listens_for(Engine, "after_cursor_execute")
def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
def after_cursor_execute(
conn: Connection,
cursor: Any,
statement: str,
parameters: dict[str, Any] | None,
context: Connection,
executemany: bool,
) -> None:
if hasattr(conn, "cursor_id") and conn.cursor_id == id(cursor):
query = get_statement_from_context(context)
if query:
elapsed = time.time() - conn.query_start_time
elapsed = time.time() - getattr(conn, "query_start_time", time.time())
if elapsed > 1:
query_end = query[-16:]
query = query.split(query_end)[0] + query_end
@@ -274,10 +176,11 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema
elapsed_n = math.floor(elapsed)
logger.debug("*" * (elapsed_n))
logger.debug(f"{elapsed:.3f} s")
del conn.cursor_id # Удаление идентификатора курсора после выполнения
if hasattr(conn, "cursor_id"):
delattr(conn, "cursor_id") # Удаление идентификатора курсора после выполнения
def get_json_builder():
def get_json_builder() -> tuple[Any, Any, Any]:
"""
Возвращает подходящие функции для построения JSON объектов в зависимости от драйвера БД
"""
@@ -286,10 +189,10 @@ def get_json_builder():
if dialect.startswith("postgres"):
json_cast = lambda x: func.cast(x, sqlalchemy.Text) # noqa: E731
return func.json_build_object, func.json_agg, json_cast
elif dialect.startswith("sqlite") or dialect.startswith("mysql"):
if dialect.startswith(("sqlite", "mysql")):
return func.json_object, func.json_group_array, json_cast
else:
raise NotImplementedError(f"JSON builder not implemented for dialect {dialect}")
msg = f"JSON builder not implemented for dialect {dialect}"
raise NotImplementedError(msg)
# Используем их в коде
@@ -299,7 +202,7 @@ json_builder, json_array_builder, json_cast = get_json_builder()
# This function is used for search indexing
async def fetch_all_shouts(session=None):
async def fetch_all_shouts(session: Session | None = None) -> list[Any]:
"""Fetch all published shouts for search indexing with authors preloaded"""
from orm.shout import Shout
@@ -313,13 +216,112 @@ async def fetch_all_shouts(session=None):
query = (
session.query(Shout)
.options(joinedload(Shout.authors))
.filter(Shout.published_at.is_not(None), Shout.deleted_at.is_(None))
.filter(Shout.published_at is not None, Shout.deleted_at is None)
)
shouts = query.all()
return shouts
return query.all()
except Exception as e:
logger.error(f"Error fetching shouts for search indexing: {e}")
logger.exception(f"Error fetching shouts for search indexing: {e}")
return []
finally:
if close_session:
session.close()
def get_column_names_without_virtual(model_cls: type[BaseModel]) -> list[str]:
"""Получает имена колонок модели без виртуальных полей"""
try:
column_names: list[str] = [
col.name for col in model_cls.__table__.columns if not getattr(col, "_is_virtual", False)
]
return column_names
except AttributeError:
return []
def get_primary_key_columns(model_cls: type[BaseModel]) -> list[str]:
"""Получает имена первичных ключей модели"""
try:
return [col.name for col in model_cls.__table__.primary_key.columns]
except AttributeError:
return ["id"]
def create_table_if_not_exists(engine: Engine, model_cls: type[BaseModel]) -> None:
"""Creates table for the given model if it doesn't exist"""
if hasattr(model_cls, "__tablename__"):
inspector = inspect(engine)
if not inspector.has_table(model_cls.__tablename__):
model_cls.__table__.create(engine)
logger.info(f"Created table: {model_cls.__tablename__}")
def format_sql_warning(
message: str | Warning,
category: type[Warning],
filename: str,
lineno: int,
file: TextIOWrapper | None = None,
line: str | None = None,
) -> str:
"""Custom warning formatter for SQL warnings"""
return f"SQL Warning: {message}\n"
# Apply the custom warning formatter
def _set_warning_formatter() -> None:
"""Set custom warning formatter"""
import warnings
original_formatwarning = warnings.formatwarning
def custom_formatwarning(
message: Warning | str,
category: type[Warning],
filename: str,
lineno: int,
file: TextIOWrapper | None = None,
line: str | None = None,
) -> str:
return format_sql_warning(message, category, filename, lineno, file, line)
warnings.formatwarning = custom_formatwarning # type: ignore[assignment]
_set_warning_formatter()
def upsert_on_duplicate(table: sqlalchemy.Table, **values: Any) -> sqlalchemy.sql.Insert:
"""
Performs an upsert operation (insert or update on conflict)
"""
if engine.dialect.name == "sqlite":
return insert(table).values(**values).on_conflict_do_update(index_elements=["id"], set_=values)
# For other databases, implement appropriate upsert logic
return table.insert().values(**values)
def get_sql_functions() -> dict[str, Any]:
"""Returns database-specific SQL functions"""
if engine.dialect.name == "sqlite":
return {
"now": sqlalchemy.func.datetime("now"),
"extract_epoch": lambda x: sqlalchemy.func.strftime("%s", x),
"coalesce": sqlalchemy.func.coalesce,
}
return {
"now": sqlalchemy.func.now(),
"extract_epoch": sqlalchemy.func.extract("epoch", sqlalchemy.text("?")),
"coalesce": sqlalchemy.func.coalesce,
}
# noinspection PyUnusedLocal
def local_session(src: str = "") -> Session:
"""Create a new database session"""
return Session(bind=engine, expire_on_commit=False)
# Export Base for backward compatibility
Base = _Base
# Also export the type for type hints
__all__ = ["Base", "BaseModel", "BaseType", "engine", "local_session"]

View File

@@ -1,404 +1,354 @@
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Set
from typing import Dict, List, Literal, Optional
from redis import Redis
from settings import REDIS_URL, ROOT_DIR
from services.redis import redis
from utils.logger import root_logger as logger
@dataclass
class EnvVariable:
"""Представление переменной окружения"""
key: str
value: str
description: Optional[str] = None
type: str = "string"
value: str = ""
description: str = ""
type: Literal["string", "integer", "boolean", "json"] = "string" # string, integer, boolean, json
is_secret: bool = False
@dataclass
class EnvSection:
"""Группа переменных окружения"""
name: str
description: str
variables: List[EnvVariable]
description: Optional[str] = None
class EnvManager:
"""
Менеджер переменных окружения с хранением в Redis и синхронизацией с .env файлом
Менеджер переменных окружения с поддержкой Redis кеширования
"""
# Стандартные переменные окружения, которые следует исключить
EXCLUDED_ENV_VARS: Set[str] = {
"PATH",
"SHELL",
"USER",
"HOME",
"PWD",
"TERM",
"LANG",
"PYTHONPATH",
"_",
"TMPDIR",
"TERM_PROGRAM",
"TERM_SESSION_ID",
"XPC_SERVICE_NAME",
"XPC_FLAGS",
"SHLVL",
"SECURITYSESSIONID",
"LOGNAME",
"OLDPWD",
"ZSH",
"PAGER",
"LESS",
"LC_CTYPE",
"LSCOLORS",
"SSH_AUTH_SOCK",
"DISPLAY",
"COLORTERM",
"EDITOR",
"VISUAL",
"PYTHONDONTWRITEBYTECODE",
"VIRTUAL_ENV",
"PYTHONUNBUFFERED",
}
# Секции для группировки переменных
# Определение секций с их описаниями
SECTIONS = {
"AUTH": {
"pattern": r"^(JWT|AUTH|SESSION|OAUTH|GITHUB|GOOGLE|FACEBOOK)_",
"name": "Авторизация",
"description": "Настройки системы авторизации",
},
"DATABASE": {
"pattern": r"^(DB|DATABASE|POSTGRES|MYSQL|SQL)_",
"name": "База данных",
"description": "Настройки подключения к базам данных",
},
"CACHE": {
"pattern": r"^(REDIS|CACHE|MEMCACHED)_",
"name": "Кэширование",
"description": "Настройки систем кэширования",
},
"SEARCH": {
"pattern": r"^(ELASTIC|SEARCH|OPENSEARCH)_",
"name": "Поиск",
"description": "Настройки поисковых систем",
},
"APP": {
"pattern": r"^(APP|PORT|HOST|DEBUG|DOMAIN|ENVIRONMENT|ENV|FRONTEND)_",
"name": "Общие настройки",
"description": "Общие настройки приложения",
},
"LOGGING": {
"pattern": r"^(LOG|LOGGING|SENTRY|GLITCH|GLITCHTIP)_",
"name": "Мониторинг",
"description": "Настройки логирования и мониторинга",
},
"EMAIL": {
"pattern": r"^(MAIL|EMAIL|SMTP|IMAP|POP3|POST)_",
"name": "Электронная почта",
"description": "Настройки отправки электронной почты",
},
"ANALYTICS": {
"pattern": r"^(GA|GOOGLE_ANALYTICS|ANALYTICS)_",
"name": "Аналитика",
"description": "Настройки систем аналитики",
},
"database": "Настройки базы данных",
"auth": "Настройки аутентификации",
"redis": "Настройки Redis",
"search": "Настройки поиска",
"integrations": "Внешние интеграции",
"security": "Настройки безопасности",
"logging": "Настройки логирования",
"features": "Флаги функций",
"other": "Прочие настройки",
}
# Переменные, которые следует всегда помечать как секретные
SECRET_VARS_PATTERNS = [
r".*TOKEN.*",
r".*SECRET.*",
r".*PASSWORD.*",
r".*KEY.*",
r".*PWD.*",
r".*PASS.*",
r".*CRED.*",
r".*_DSN.*",
r".*JWT.*",
r".*SESSION.*",
r".*OAUTH.*",
r".*GITHUB.*",
r".*GOOGLE.*",
r".*FACEBOOK.*",
]
# Маппинг переменных на секции
VARIABLE_SECTIONS = {
# Database
"DB_URL": "database",
"DATABASE_URL": "database",
"POSTGRES_USER": "database",
"POSTGRES_PASSWORD": "database",
"POSTGRES_DB": "database",
"POSTGRES_HOST": "database",
"POSTGRES_PORT": "database",
# Auth
"JWT_SECRET": "auth",
"JWT_ALGORITHM": "auth",
"JWT_EXPIRATION": "auth",
"SECRET_KEY": "auth",
"AUTH_SECRET": "auth",
"OAUTH_GOOGLE_CLIENT_ID": "auth",
"OAUTH_GOOGLE_CLIENT_SECRET": "auth",
"OAUTH_GITHUB_CLIENT_ID": "auth",
"OAUTH_GITHUB_CLIENT_SECRET": "auth",
# Redis
"REDIS_URL": "redis",
"REDIS_HOST": "redis",
"REDIS_PORT": "redis",
"REDIS_PASSWORD": "redis",
"REDIS_DB": "redis",
# Search
"SEARCH_API_KEY": "search",
"ELASTICSEARCH_URL": "search",
"SEARCH_INDEX": "search",
# Integrations
"GOOGLE_ANALYTICS_ID": "integrations",
"SENTRY_DSN": "integrations",
"SMTP_HOST": "integrations",
"SMTP_PORT": "integrations",
"SMTP_USER": "integrations",
"SMTP_PASSWORD": "integrations",
"EMAIL_FROM": "integrations",
# Security
"CORS_ORIGINS": "security",
"ALLOWED_HOSTS": "security",
"SECURE_SSL_REDIRECT": "security",
"SESSION_COOKIE_SECURE": "security",
"CSRF_COOKIE_SECURE": "security",
# Logging
"LOG_LEVEL": "logging",
"LOG_FORMAT": "logging",
"LOG_FILE": "logging",
"DEBUG": "logging",
# Features
"FEATURE_REGISTRATION": "features",
"FEATURE_COMMENTS": "features",
"FEATURE_ANALYTICS": "features",
"FEATURE_SEARCH": "features",
}
def __init__(self):
self.redis = Redis.from_url(REDIS_URL)
self.prefix = "env:"
self.env_file_path = os.path.join(ROOT_DIR, ".env")
# Секретные переменные (не показываем их значения в UI)
SECRET_VARIABLES = {
"JWT_SECRET",
"SECRET_KEY",
"AUTH_SECRET",
"OAUTH_GOOGLE_CLIENT_SECRET",
"OAUTH_GITHUB_CLIENT_SECRET",
"POSTGRES_PASSWORD",
"REDIS_PASSWORD",
"SEARCH_API_KEY",
"SENTRY_DSN",
"SMTP_PASSWORD",
}
def get_all_variables(self) -> List[EnvSection]:
"""
Получение всех переменных окружения, сгруппированных по секциям
"""
try:
# Получаем все переменные окружения из системы
system_env = self._get_system_env_vars()
def __init__(self) -> None:
self.redis_prefix = "env_vars:"
# Получаем переменные из .env файла, если он существует
dotenv_vars = self._get_dotenv_vars()
def _get_variable_type(self, key: str, value: str) -> Literal["string", "integer", "boolean", "json"]:
"""Определяет тип переменной на основе ключа и значения"""
# Получаем все переменные из Redis
redis_vars = self._get_redis_env_vars()
# Объединяем переменные, при этом redis_vars имеют наивысший приоритет,
# за ними следуют переменные из .env, затем системные
env_vars = {**system_env, **dotenv_vars, **redis_vars}
# Группируем переменные по секциям
return self._group_variables_by_sections(env_vars)
except Exception as e:
logger.error(f"Ошибка получения переменных: {e}")
return []
def _get_system_env_vars(self) -> Dict[str, str]:
"""
Получает переменные окружения из системы, исключая стандартные
"""
env_vars = {}
for key, value in os.environ.items():
# Пропускаем стандартные переменные
if key in self.EXCLUDED_ENV_VARS:
continue
# Пропускаем переменные с пустыми значениями
if not value:
continue
env_vars[key] = value
return env_vars
def _get_dotenv_vars(self) -> Dict[str, str]:
"""
Получает переменные из .env файла, если он существует
"""
env_vars = {}
if os.path.exists(self.env_file_path):
try:
with open(self.env_file_path, "r") as f:
for line in f:
line = line.strip()
# Пропускаем пустые строки и комментарии
if not line or line.startswith("#"):
continue
# Разделяем строку на ключ и значение
if "=" in line:
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
# Удаляем кавычки, если они есть
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
env_vars[key] = value
except Exception as e:
logger.error(f"Ошибка чтения .env файла: {e}")
return env_vars
def _get_redis_env_vars(self) -> Dict[str, str]:
"""
Получает переменные окружения из Redis
"""
redis_vars = {}
try:
# Получаем все ключи с префиксом env:
keys = self.redis.keys(f"{self.prefix}*")
for key in keys:
var_key = key.decode("utf-8").replace(self.prefix, "")
value = self.redis.get(key)
if value:
redis_vars[var_key] = value.decode("utf-8")
except Exception as e:
logger.error(f"Ошибка получения переменных из Redis: {e}")
return redis_vars
def _is_secret_variable(self, key: str) -> bool:
"""
Проверяет, является ли переменная секретной.
Секретными считаются:
- переменные, подходящие под SECRET_VARS_PATTERNS
- переменные с ключами DATABASE_URL, REDIS_URL, DB_URL (точное совпадение, без учета регистра)
>>> EnvManager()._is_secret_variable('MY_SECRET_TOKEN')
True
>>> EnvManager()._is_secret_variable('database_url')
True
>>> EnvManager()._is_secret_variable('REDIS_URL')
True
>>> EnvManager()._is_secret_variable('DB_URL')
True
>>> EnvManager()._is_secret_variable('SOME_PUBLIC_KEY')
True
>>> EnvManager()._is_secret_variable('SOME_PUBLIC_VAR')
False
"""
key_upper = key.upper()
if key_upper in {"DATABASE_URL", "REDIS_URL", "DB_URL"}:
return True
return any(re.match(pattern, key_upper) for pattern in self.SECRET_VARS_PATTERNS)
def _determine_variable_type(self, value: str) -> str:
"""
Определяет тип переменной на основе ее значения
"""
if value.lower() in ("true", "false"):
# Boolean переменные
if value.lower() in ("true", "false", "1", "0", "yes", "no"):
return "boolean"
if value.isdigit():
# Integer переменные
if key.endswith(("_PORT", "_TIMEOUT", "_LIMIT", "_SIZE")) or value.isdigit():
return "integer"
if re.match(r"^\d+\.\d+$", value):
return "float"
# Проверяем на JSON объект или массив
if (value.startswith("{") and value.endswith("}")) or (value.startswith("[") and value.endswith("]")):
# JSON переменные
if value.startswith(("{", "[")) and value.endswith(("}", "]")):
return "json"
# Проверяем на URL
if value.startswith(("http://", "https://", "redis://", "postgresql://")):
return "url"
return "string"
def _group_variables_by_sections(self, variables: Dict[str, str]) -> List[EnvSection]:
"""
Группирует переменные по секциям
"""
# Создаем словарь для группировки переменных
sections_dict = {section: [] for section in self.SECTIONS}
other_variables = [] # Для переменных, которые не попали ни в одну секцию
def _get_variable_description(self, key: str) -> str:
"""Генерирует описание для переменной на основе её ключа"""
# Распределяем переменные по секциям
for key, value in variables.items():
is_secret = self._is_secret_variable(key)
var_type = self._determine_variable_type(value)
descriptions = {
"DB_URL": "URL подключения к базе данных",
"REDIS_URL": "URL подключения к Redis",
"JWT_SECRET": "Секретный ключ для подписи JWT токенов",
"CORS_ORIGINS": "Разрешенные CORS домены",
"DEBUG": "Режим отладки (true/false)",
"LOG_LEVEL": "Уровень логирования (DEBUG, INFO, WARNING, ERROR)",
"SENTRY_DSN": "DSN для интеграции с Sentry",
"GOOGLE_ANALYTICS_ID": "ID для Google Analytics",
"OAUTH_GOOGLE_CLIENT_ID": "Client ID для OAuth Google",
"OAUTH_GOOGLE_CLIENT_SECRET": "Client Secret для OAuth Google",
"OAUTH_GITHUB_CLIENT_ID": "Client ID для OAuth GitHub",
"OAUTH_GITHUB_CLIENT_SECRET": "Client Secret для OAuth GitHub",
"SMTP_HOST": "SMTP сервер для отправки email",
"SMTP_PORT": "Порт SMTP сервера",
"SMTP_USER": "Пользователь SMTP",
"SMTP_PASSWORD": "Пароль SMTP",
"EMAIL_FROM": "Email отправителя по умолчанию",
}
var = EnvVariable(key=key, value=value, type=var_type, is_secret=is_secret)
return descriptions.get(key, f"Переменная окружения {key}")
# Определяем секцию для переменной
placed = False
for section_id, section_config in self.SECTIONS.items():
if re.match(section_config["pattern"], key, re.IGNORECASE):
sections_dict[section_id].append(var)
placed = True
break
async def get_variables_from_redis(self) -> Dict[str, str]:
"""Получает переменные из Redis"""
# Если переменная не попала ни в одну секцию
# if not placed:
# other_variables.append(var)
try:
# Get all keys matching our prefix
pattern = f"{self.redis_prefix}*"
keys = await redis.execute("KEYS", pattern)
# Формируем результат
result = []
for section_id, variables in sections_dict.items():
if variables: # Добавляем только непустые секции
section_config = self.SECTIONS[section_id]
result.append(
if not keys:
return {}
redis_vars: Dict[str, str] = {}
for key in keys:
var_key = key.replace(self.redis_prefix, "")
value = await redis.get(key)
if value:
if isinstance(value, bytes):
redis_vars[var_key] = value.decode("utf-8")
else:
redis_vars[var_key] = str(value)
return redis_vars
except Exception as e:
logger.error(f"Ошибка при получении переменных из Redis: {e}")
return {}
async def set_variables_to_redis(self, variables: Dict[str, str]) -> bool:
"""Сохраняет переменные в Redis"""
try:
for key, value in variables.items():
redis_key = f"{self.redis_prefix}{key}"
await redis.set(redis_key, value)
logger.info(f"Сохранено {len(variables)} переменных в Redis")
return True
except Exception as e:
logger.error(f"Ошибка при сохранении переменных в Redis: {e}")
return False
def get_variables_from_env(self) -> Dict[str, str]:
"""Получает переменные из системного окружения"""
env_vars = {}
# Получаем все переменные известные системе
for key in self.VARIABLE_SECTIONS.keys():
value = os.getenv(key)
if value is not None:
env_vars[key] = value
# Также ищем переменные по паттернам
for env_key, env_value in os.environ.items():
# Переменные проекта обычно начинаются с определенных префиксов
if any(env_key.startswith(prefix) for prefix in ["APP_", "SITE_", "FEATURE_", "OAUTH_"]):
env_vars[env_key] = env_value
return env_vars
async def get_all_variables(self) -> List[EnvSection]:
"""Получает все переменные окружения, сгруппированные по секциям"""
# Получаем переменные из разных источников
env_vars = self.get_variables_from_env()
redis_vars = await self.get_variables_from_redis()
# Объединяем переменные (приоритет у Redis)
all_vars = {**env_vars, **redis_vars}
# Группируем по секциям
sections_dict: Dict[str, List[EnvVariable]] = {section: [] for section in self.SECTIONS}
other_variables: List[EnvVariable] = [] # Для переменных, которые не попали ни в одну секцию
for key, value in all_vars.items():
section_name = self.VARIABLE_SECTIONS.get(key, "other")
is_secret = key in self.SECRET_VARIABLES
var = EnvVariable(
key=key,
value=value if not is_secret else "***", # Скрываем секретные значения
description=self._get_variable_description(key),
type=self._get_variable_type(key, value),
is_secret=is_secret,
)
if section_name in sections_dict:
sections_dict[section_name].append(var)
else:
other_variables.append(var)
# Добавляем переменные без секции в раздел "other"
if other_variables:
sections_dict["other"].extend(other_variables)
# Создаем объекты секций
sections = []
for section_key, variables in sections_dict.items():
if variables: # Добавляем только секции с переменными
sections.append(
EnvSection(
name=section_config["name"], description=section_config["description"], variables=variables
name=section_key,
description=self.SECTIONS[section_key],
variables=sorted(variables, key=lambda x: x.key),
)
)
# Добавляем прочие переменные, если они есть
if other_variables:
result.append(
EnvSection(
name="Прочие переменные",
description="Переменные, не вошедшие в основные категории",
variables=other_variables,
)
)
return sorted(sections, key=lambda x: x.name)
return result
async def update_variables(self, variables: List[EnvVariable]) -> bool:
"""Обновляет переменные окружения"""
def update_variable(self, key: str, value: str) -> bool:
"""
Обновление значения переменной в Redis и .env файле
"""
try:
# Подготавливаем данные для сохранения
vars_to_save = {}
for var in variables:
# Валидация
if not var.key or not isinstance(var.key, str):
logger.error(f"Неверный ключ переменной: {var.key}")
continue
# Проверяем формат ключа (только буквы, цифры и подчеркивания)
if not re.match(r"^[A-Z_][A-Z0-9_]*$", var.key):
logger.error(f"Неверный формат ключа: {var.key}")
continue
vars_to_save[var.key] = var.value
if not vars_to_save:
logger.warning("Нет переменных для сохранения")
return False
# Сохраняем в Redis
full_key = f"{self.prefix}{key}"
self.redis.set(full_key, value)
success = await self.set_variables_to_redis(vars_to_save)
# Обновляем значение в .env файле
self._update_dotenv_var(key, value)
if success:
logger.info(f"Обновлено {len(vars_to_save)} переменных окружения")
# Обновляем переменную в текущем процессе
os.environ[key] = value
return success
return True
except Exception as e:
logger.error(f"Ошибка обновления переменной {key}: {e}")
logger.error(f"Ошибка при обновлении переменных: {e}")
return False
def _update_dotenv_var(self, key: str, value: str) -> bool:
"""
Обновляет переменную в .env файле
"""
async def delete_variable(self, key: str) -> bool:
"""Удаляет переменную окружения"""
try:
# Если файл .env не существует, создаем его
if not os.path.exists(self.env_file_path):
with open(self.env_file_path, "w") as f:
f.write(f"{key}={value}\n")
redis_key = f"{self.redis_prefix}{key}"
result = await redis.delete(redis_key)
if result > 0:
logger.info(f"Переменная {key} удалена")
return True
# Если файл существует, читаем его содержимое
lines = []
found = False
with open(self.env_file_path, "r") as f:
for line in f:
if line.strip() and not line.strip().startswith("#"):
if line.strip().startswith(f"{key}="):
# Экранируем значение, если необходимо
if " " in value or "," in value or '"' in value or "'" in value:
escaped_value = f'"{value}"'
else:
escaped_value = value
lines.append(f"{key}={escaped_value}\n")
found = True
else:
lines.append(line)
else:
lines.append(line)
# Если переменной не было в файле, добавляем ее
if not found:
# Экранируем значение, если необходимо
if " " in value or "," in value or '"' in value or "'" in value:
escaped_value = f'"{value}"'
else:
escaped_value = value
lines.append(f"{key}={escaped_value}\n")
# Записываем обновленный файл
with open(self.env_file_path, "w") as f:
f.writelines(lines)
return True
except Exception as e:
logger.error(f"Ошибка обновления .env файла: {e}")
logger.warning(f"Переменная {key} не найдена")
return False
def update_variables(self, variables: List[EnvVariable]) -> bool:
"""
Массовое обновление переменных
"""
try:
# Обновляем переменные в Redis
pipe = self.redis.pipeline()
for var in variables:
full_key = f"{self.prefix}{var.key}"
pipe.set(full_key, var.value)
pipe.execute()
# Обновляем переменные в .env файле
for var in variables:
self._update_dotenv_var(var.key, var.value)
# Обновляем переменную в текущем процессе
os.environ[var.key] = var.value
return True
except Exception as e:
logger.error(f"Ошибка массового обновления переменных: {e}")
logger.error(f"Ошибка при удалении переменной {key}: {e}")
return False
async def get_variable(self, key: str) -> Optional[str]:
"""Получает значение конкретной переменной"""
# Сначала проверяем Redis
try:
redis_key = f"{self.redis_prefix}{key}"
value = await redis.get(redis_key)
if value:
return value.decode("utf-8") if isinstance(value, bytes) else str(value)
except Exception as e:
logger.error(f"Ошибка при получении переменной {key} из Redis: {e}")
# Fallback на системное окружение
return os.getenv(key)
async def set_variable(self, key: str, value: str) -> bool:
"""Устанавливает значение переменной"""
try:
redis_key = f"{self.redis_prefix}{key}"
await redis.set(redis_key, value)
logger.info(f"Переменная {key} установлена")
return True
except Exception as e:
logger.error(f"Ошибка при установке переменной {key}: {e}")
return False

View File

@@ -1,19 +1,21 @@
import logging
from collections.abc import Awaitable
from typing import Callable
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from starlette.requests import Request
from starlette.responses import JSONResponse, Response
logger = logging.getLogger("exception")
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
class ExceptionHandlerMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request, call_next):
async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
try:
response = await call_next(request)
return response
except Exception as exc:
logger.exception(exc)
return await call_next(request)
except Exception:
logger.exception("Unhandled exception occurred")
return JSONResponse(
{"detail": "An error occurred. Please try again later."},
status_code=500,

View File

@@ -1,46 +1,82 @@
from collections.abc import Collection
from typing import Any, Dict, Union
import orjson
from orm.notification import Notification
from orm.reaction import Reaction
from orm.shout import Shout
from services.db import local_session
from services.redis import redis
from utils.logger import root_logger as logger
def save_notification(action: str, entity: str, payload):
def save_notification(action: str, entity: str, payload: Union[Dict[Any, Any], str, int, None]) -> None:
"""Save notification with proper payload handling"""
if payload is None:
payload = ""
elif isinstance(payload, (Reaction, Shout)):
# Convert ORM objects to dict representation
payload = {"id": payload.id}
elif isinstance(payload, Collection) and not isinstance(payload, (str, bytes)):
# Convert collections to string representation
payload = str(payload)
with local_session() as session:
n = Notification(action=action, entity=entity, payload=payload)
session.add(n)
session.commit()
async def notify_reaction(reaction, action: str = "create"):
async def notify_reaction(reaction: Union[Reaction, int], action: str = "create") -> None:
channel_name = "reaction"
data = {"payload": reaction, "action": action}
# Преобразуем объект Reaction в словарь для сериализации
if isinstance(reaction, Reaction):
reaction_payload = {
"id": reaction.id,
"kind": reaction.kind,
"body": reaction.body,
"shout": reaction.shout,
"created_by": reaction.created_by,
"created_at": getattr(reaction, "created_at", None),
}
else:
# Если передан просто ID
reaction_payload = {"id": reaction}
data = {"payload": reaction_payload, "action": action}
try:
save_notification(action, channel_name, data.get("payload"))
save_notification(action, channel_name, reaction_payload)
await redis.publish(channel_name, orjson.dumps(data))
except Exception as e:
except (ConnectionError, TimeoutError, ValueError) as e:
logger.error(f"Failed to publish to channel {channel_name}: {e}")
async def notify_shout(shout, action: str = "update"):
async def notify_shout(shout: Dict[str, Any], action: str = "update") -> None:
channel_name = "shout"
data = {"payload": shout, "action": action}
try:
save_notification(action, channel_name, data.get("payload"))
payload = data.get("payload")
if isinstance(payload, Collection) and not isinstance(payload, (str, bytes, dict)):
payload = str(payload)
save_notification(action, channel_name, payload)
await redis.publish(channel_name, orjson.dumps(data))
except Exception as e:
except (ConnectionError, TimeoutError, ValueError) as e:
logger.error(f"Failed to publish to channel {channel_name}: {e}")
async def notify_follower(follower: dict, author_id: int, action: str = "follow"):
async def notify_follower(follower: Dict[str, Any], author_id: int, action: str = "follow") -> None:
channel_name = f"follower:{author_id}"
try:
# Simplify dictionary before publishing
simplified_follower = {k: follower[k] for k in ["id", "name", "slug", "pic"]}
data = {"payload": simplified_follower, "action": action}
# save in channel
save_notification(action, channel_name, data.get("payload"))
payload = data.get("payload")
if isinstance(payload, Collection) and not isinstance(payload, (str, bytes, dict)):
payload = str(payload)
save_notification(action, channel_name, payload)
# Convert data to JSON string
json_data = orjson.dumps(data)
@@ -50,12 +86,12 @@ async def notify_follower(follower: dict, author_id: int, action: str = "follow"
# Use the 'await' keyword when publishing
await redis.publish(channel_name, json_data)
except Exception as e:
except (ConnectionError, TimeoutError, KeyError, ValueError) as e:
# Log the error and re-raise it
logger.error(f"Failed to publish to channel {channel_name}: {e}")
async def notify_draft(draft_data, action: str = "publish"):
async def notify_draft(draft_data: Dict[str, Any], action: str = "publish") -> None:
"""
Отправляет уведомление о публикации или обновлении черновика.
@@ -63,8 +99,8 @@ async def notify_draft(draft_data, action: str = "publish"):
связанные атрибуты (topics, authors).
Args:
draft_data (dict): Словарь с данными черновика. Должен содержать минимум id и title
action (str, optional): Действие ("publish", "update"). По умолчанию "publish"
draft_data: Словарь с данными черновика или ORM объект. Должен содержать минимум id и title
action: Действие ("publish", "update"). По умолчанию "publish"
Returns:
None
@@ -109,12 +145,15 @@ async def notify_draft(draft_data, action: str = "publish"):
data = {"payload": draft_payload, "action": action}
# Сохраняем уведомление
save_notification(action, channel_name, data.get("payload"))
payload = data.get("payload")
if isinstance(payload, Collection) and not isinstance(payload, (str, bytes, dict)):
payload = str(payload)
save_notification(action, channel_name, payload)
# Публикуем в Redis
json_data = orjson.dumps(data)
if json_data:
await redis.publish(channel_name, json_data)
except Exception as e:
except (ConnectionError, TimeoutError, AttributeError, ValueError) as e:
logger.error(f"Failed to publish to channel {channel_name}: {e}")

View File

@@ -1,170 +1,90 @@
import asyncio
import concurrent.futures
from typing import Dict, List, Tuple
from concurrent.futures import Future
from typing import Any, Optional
from txtai.embeddings import Embeddings
try:
from utils.logger import root_logger as logger
except ImportError:
import logging
from services.logger import root_logger as logger
logger = logging.getLogger(__name__)
class TopicClassifier:
def __init__(self, shouts_by_topic: Dict[str, str], publications: List[Dict[str, str]]):
"""
Инициализация классификатора тем и поиска публикаций.
Args:
shouts_by_topic: Словарь {тема: текст_всех_публикаций}
publications: Список публикаций с полями 'id', 'title', 'text'
"""
self.shouts_by_topic = shouts_by_topic
self.topics = list(shouts_by_topic.keys())
self.publications = publications
self.topic_embeddings = None # Для классификации тем
self.search_embeddings = None # Для поиска публикаций
self._initialization_future = None
self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
class PreTopicService:
def __init__(self) -> None:
self.topic_embeddings: Optional[Any] = None
self.search_embeddings: Optional[Any] = None
self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
self._initialization_future: Optional[Future[None]] = None
def initialize(self) -> None:
"""
Асинхронная инициализация векторных представлений.
"""
def _ensure_initialization(self) -> None:
"""Ensure embeddings are initialized"""
if self._initialization_future is None:
self._initialization_future = self._executor.submit(self._prepare_embeddings)
logger.info("Векторизация текстов начата в фоновом режиме...")
def _prepare_embeddings(self) -> None:
"""
Подготавливает векторные представления для тем и поиска.
"""
logger.info("Начинается подготовка векторных представлений...")
# Модель для русского языка
# TODO: model local caching
model_path = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
# Инициализируем embeddings для классификации тем
self.topic_embeddings = Embeddings(path=model_path)
topic_documents = [(topic, text) for topic, text in self.shouts_by_topic.items()]
self.topic_embeddings.index(topic_documents)
# Инициализируем embeddings для поиска публикаций
self.search_embeddings = Embeddings(path=model_path)
search_documents = [(str(pub["id"]), f"{pub['title']} {pub['text']}") for pub in self.publications]
self.search_embeddings.index(search_documents)
logger.info("Подготовка векторных представлений завершена.")
def predict_topic(self, text: str) -> Tuple[float, str]:
"""
Предсказывает тему для заданного текста из известного набора тем.
Args:
text: Текст для классификации
Returns:
Tuple[float, str]: (уверенность, тема)
"""
if not self.is_ready():
logger.error("Векторные представления не готовы. Вызовите initialize() и дождитесь завершения.")
return 0.0, "unknown"
"""Prepare embeddings for topic and search functionality"""
try:
# Ищем наиболее похожую тему
results = self.topic_embeddings.search(text, 1)
if not results:
return 0.0, "unknown"
from txtai.embeddings import Embeddings # type: ignore[import-untyped]
score, topic = results[0]
return float(score), topic
# Initialize topic embeddings
self.topic_embeddings = Embeddings(
{
"method": "transformers",
"path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
}
)
# Initialize search embeddings
self.search_embeddings = Embeddings(
{
"method": "transformers",
"path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
}
)
logger.info("PreTopic embeddings initialized successfully")
except ImportError:
logger.warning("txtai.embeddings not available, PreTopicService disabled")
except Exception as e:
logger.error(f"Ошибка при определении темы: {str(e)}")
return 0.0, "unknown"
logger.error(f"Failed to initialize embeddings: {e}")
def search_similar(self, query: str, limit: int = 5) -> List[Dict[str, any]]:
"""
Ищет публикации похожие на поисковый запрос.
Args:
query: Поисковый запрос
limit: Максимальное количество результатов
Returns:
List[Dict]: Список найденных публикаций с оценкой релевантности
"""
if not self.is_ready():
logger.error("Векторные представления не готовы. Вызовите initialize() и дождитесь завершения.")
async def suggest_topics(self, text: str) -> list[dict[str, Any]]:
"""Suggest topics based on text content"""
if self.topic_embeddings is None:
return []
try:
# Ищем похожие публикации
results = self.search_embeddings.search(query, limit)
# Формируем результаты
found_publications = []
for score, pub_id in results:
# Находим публикацию по id
publication = next((pub for pub in self.publications if str(pub["id"]) == pub_id), None)
if publication:
found_publications.append({**publication, "relevance": float(score)})
return found_publications
self._ensure_initialization()
if self._initialization_future:
await asyncio.wrap_future(self._initialization_future)
if self.topic_embeddings is not None:
results = self.topic_embeddings.search(text, 1)
if results:
return [{"topic": result["text"], "score": result["score"]} for result in results]
except Exception as e:
logger.error(f"Ошибка при поиске публикаций: {str(e)}")
logger.error(f"Error suggesting topics: {e}")
return []
async def search_content(self, query: str, limit: int = 10) -> list[dict[str, Any]]:
"""Search content using embeddings"""
if self.search_embeddings is None:
return []
def is_ready(self) -> bool:
"""
Проверяет, готовы ли векторные представления.
"""
return self.topic_embeddings is not None and self.search_embeddings is not None
try:
self._ensure_initialization()
if self._initialization_future:
await asyncio.wrap_future(self._initialization_future)
def wait_until_ready(self) -> None:
"""
Ожидает завершения подготовки векторных представлений.
"""
if self._initialization_future:
self._initialization_future.result()
def __del__(self):
"""
Очистка ресурсов при удалении объекта.
"""
if self._executor:
self._executor.shutdown(wait=False)
if self.search_embeddings is not None:
results = self.search_embeddings.search(query, limit)
if results:
return [{"content": result["text"], "score": result["score"]} for result in results]
except Exception as e:
logger.error(f"Error searching content: {e}")
return []
# Пример использования:
"""
shouts_by_topic = {
"Спорт": "... большой текст со всеми спортивными публикациями ...",
"Технологии": "... большой текст со всеми технологическими публикациями ...",
"Политика": "... большой текст со всеми политическими публикациями ..."
}
publications = [
{
'id': 1,
'title': 'Новый процессор AMD',
'text': 'Компания AMD представила новый процессор...'
},
{
'id': 2,
'title': 'Футбольный матч',
'text': 'Вчера состоялся решающий матч...'
}
]
# Создание классификатора
classifier = TopicClassifier(shouts_by_topic, publications)
classifier.initialize()
classifier.wait_until_ready()
# Определение темы текста
text = "Новый процессор показал высокую производительность"
score, topic = classifier.predict_topic(text)
print(f"Тема: {topic} (уверенность: {score:.4f})")
# Поиск похожих публикаций
query = "процессор AMD производительность"
similar_publications = classifier.search_similar(query, limit=3)
for pub in similar_publications:
print(f"\nНайдена публикация (релевантность: {pub['relevance']:.4f}):")
print(f"Заголовок: {pub['title']}")
print(f"Текст: {pub['text'][:100]}...")
"""
# Global instance
pretopic_service = PreTopicService()

View File

@@ -1,247 +1,260 @@
import json
import logging
from typing import TYPE_CHECKING, Any, Optional, Set, Union
import redis.asyncio as aioredis
from redis.asyncio import Redis
if TYPE_CHECKING:
pass # type: ignore[attr-defined]
from settings import REDIS_URL
from utils.logger import root_logger as logger
logger = logging.getLogger(__name__)
# Set redis logging level to suppress DEBUG messages
logger = logging.getLogger("redis")
logger.setLevel(logging.WARNING)
redis_logger = logging.getLogger("redis")
redis_logger.setLevel(logging.WARNING)
class RedisService:
def __init__(self, uri=REDIS_URL):
self._uri: str = uri
self.pubsub_channels = []
self._client = None
"""
Сервис для работы с Redis с поддержкой пулов соединений.
async def connect(self):
if self._uri and self._client is None:
self._client = await Redis.from_url(self._uri, decode_responses=True)
logger.info("Redis connection was established.")
Provides connection pooling and proper error handling for Redis operations.
"""
async def disconnect(self):
if isinstance(self._client, Redis):
await self._client.close()
logger.info("Redis connection was closed.")
def __init__(self, redis_url: str = REDIS_URL) -> None:
self._client: Optional[Redis[Any]] = None
self._redis_url = redis_url
self._is_available = aioredis is not None
async def execute(self, command, *args, **kwargs):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
logger.info(f"[redis] Автоматически установлено соединение при выполнении команды {command}")
if not self._is_available:
logger.warning("Redis is not available - aioredis not installed")
if self._client:
try:
logger.debug(f"{command}") # {args[0]}") # {args} {kwargs}")
for arg in args:
if isinstance(arg, dict):
if arg.get("_sa_instance_state"):
del arg["_sa_instance_state"]
r = await self._client.execute_command(command, *args, **kwargs)
# logger.debug(type(r))
# logger.debug(r)
return r
except Exception as e:
logger.error(e)
def pipeline(self):
"""
Возвращает пайплайн Redis для выполнения нескольких команд в одной транзакции.
Returns:
Pipeline: объект pipeline Redis
"""
if self._client is None:
# Выбрасываем исключение, так как pipeline нельзя создать до подключения
raise Exception("Redis client is not initialized. Call redis.connect() first.")
return self._client.pipeline()
async def subscribe(self, *channels):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.subscribe(channel)
self.pubsub_channels.append(channel)
async def unsubscribe(self, *channels):
if self._client is None:
async def connect(self) -> None:
"""Establish Redis connection"""
if not self._is_available:
return
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.unsubscribe(channel)
self.pubsub_channels.remove(channel)
# Закрываем существующее соединение если есть
if self._client:
try:
await self._client.close()
except Exception:
pass
self._client = None
async def publish(self, channel, data):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
try:
self._client = aioredis.from_url(
self._redis_url,
encoding="utf-8",
decode_responses=False, # We handle decoding manually
socket_keepalive=True,
socket_keepalive_options={},
retry_on_timeout=True,
health_check_interval=30,
socket_connect_timeout=5,
socket_timeout=5,
)
# Test connection
await self._client.ping()
logger.info("Successfully connected to Redis")
except Exception as e:
logger.error(f"Failed to connect to Redis: {e}")
if self._client:
try:
await self._client.close()
except Exception:
pass
self._client = None
async def disconnect(self) -> None:
"""Close Redis connection"""
if self._client:
await self._client.close()
self._client = None
@property
def is_connected(self) -> bool:
"""Check if Redis is connected"""
return self._client is not None and self._is_available
def pipeline(self) -> Any: # Returns Pipeline but we can't import it safely
"""Create a Redis pipeline"""
if self._client:
return self._client.pipeline()
return None
async def execute(self, command: str, *args: Any) -> Any:
"""Execute a Redis command"""
if not self._is_available:
logger.debug(f"Redis not available, skipping command: {command}")
return None
# Проверяем и восстанавливаем соединение при необходимости
if not self.is_connected:
logger.info("Redis not connected, attempting to reconnect...")
await self.connect()
await self._client.publish(channel, data)
if not self.is_connected:
logger.error(f"Failed to establish Redis connection for command: {command}")
return None
async def set(self, key, data, ex=None):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
# Prepare the command arguments
args = [key, data]
# If an expiration time is provided, add it to the arguments
if ex is not None:
args.append("EX")
args.append(ex)
# Execute the command with the provided arguments
await self.execute("set", *args)
async def get(self, key):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
try:
# Get the command method from the client
cmd_method = getattr(self._client, command.lower(), None)
if cmd_method is None:
logger.error(f"Unknown Redis command: {command}")
return None
result = await cmd_method(*args)
return result
except (ConnectionError, AttributeError, OSError) as e:
logger.warning(f"Redis connection lost during {command}, attempting to reconnect: {e}")
# Попытка переподключения
await self.connect()
if self.is_connected:
try:
cmd_method = getattr(self._client, command.lower(), None)
if cmd_method is not None:
result = await cmd_method(*args)
return result
except Exception as retry_e:
logger.error(f"Redis retry failed for {command}: {retry_e}")
return None
except Exception as e:
logger.error(f"Redis command failed {command}: {e}")
return None
async def get(self, key: str) -> Optional[Union[str, bytes]]:
"""Get value by key"""
return await self.execute("get", key)
async def delete(self, *keys):
"""
Удаляет ключи из Redis.
async def set(self, key: str, value: Any, ex: Optional[int] = None) -> bool:
"""Set key-value pair with optional expiration"""
if ex is not None:
result = await self.execute("setex", key, ex, value)
else:
result = await self.execute("set", key, value)
return result is not None
Args:
*keys: Ключи для удаления
async def delete(self, *keys: str) -> int:
"""Delete keys"""
result = await self.execute("delete", *keys)
return result or 0
Returns:
int: Количество удаленных ключей
"""
if not keys:
return 0
async def exists(self, key: str) -> bool:
"""Check if key exists"""
result = await self.execute("exists", key)
return bool(result)
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async def publish(self, channel: str, data: Any) -> None:
"""Publish message to channel"""
if not self.is_connected or self._client is None:
logger.debug(f"Redis not available, skipping publish to {channel}")
return
return await self._client.delete(*keys)
try:
await self._client.publish(channel, data)
except Exception as e:
logger.error(f"Failed to publish to channel {channel}: {e}")
async def hmset(self, key, mapping):
"""
Устанавливает несколько полей хеша.
async def hset(self, key: str, field: str, value: Any) -> None:
"""Set hash field"""
await self.execute("hset", key, field, value)
Args:
key: Ключ хеша
mapping: Словарь с полями и значениями
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async def hget(self, key: str, field: str) -> Optional[Union[str, bytes]]:
"""Get hash field"""
return await self.execute("hget", key, field)
await self._client.hset(key, mapping=mapping)
async def hgetall(self, key: str) -> dict[str, Any]:
"""Get all hash fields"""
result = await self.execute("hgetall", key)
return result or {}
async def expire(self, key, seconds):
"""
Устанавливает время жизни ключа.
async def keys(self, pattern: str) -> list[str]:
"""Get keys matching pattern"""
result = await self.execute("keys", pattern)
return result or []
Args:
key: Ключ
seconds: Время жизни в секундах
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async def smembers(self, key: str) -> Set[str]:
"""Get set members"""
if not self.is_connected or self._client is None:
return set()
try:
result = await self._client.smembers(key)
if result:
return {str(item.decode("utf-8") if isinstance(item, bytes) else item) for item in result}
return set()
except Exception as e:
logger.error(f"Redis smembers command failed for {key}: {e}")
return set()
await self._client.expire(key, seconds)
async def sadd(self, key: str, *members: str) -> int:
"""Add members to set"""
result = await self.execute("sadd", key, *members)
return result or 0
async def sadd(self, key, *values):
"""
Добавляет значения в множество.
async def srem(self, key: str, *members: str) -> int:
"""Remove members from set"""
result = await self.execute("srem", key, *members)
return result or 0
Args:
key: Ключ множества
*values: Значения для добавления
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async def expire(self, key: str, seconds: int) -> bool:
"""Set key expiration"""
result = await self.execute("expire", key, seconds)
return bool(result)
await self._client.sadd(key, *values)
async def serialize_and_set(self, key: str, data: Any, ex: Optional[int] = None) -> bool:
"""Serialize data to JSON and store in Redis"""
try:
if isinstance(data, (str, bytes)):
serialized_data: bytes = data.encode("utf-8") if isinstance(data, str) else data
else:
serialized_data = json.dumps(data).encode("utf-8")
async def srem(self, key, *values):
"""
Удаляет значения из множества.
return await self.set(key, serialized_data, ex=ex)
except Exception as e:
logger.error(f"Failed to serialize and set {key}: {e}")
return False
Args:
key: Ключ множества
*values: Значения для удаления
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async def get_and_deserialize(self, key: str) -> Any:
"""Get data from Redis and deserialize from JSON"""
try:
data = await self.get(key)
if data is None:
return None
await self._client.srem(key, *values)
if isinstance(data, bytes):
data = data.decode("utf-8")
async def smembers(self, key):
"""
Получает все элементы множества.
return json.loads(data)
except Exception as e:
logger.error(f"Failed to get and deserialize {key}: {e}")
return None
Args:
key: Ключ множества
Returns:
set: Множество элементов
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.smembers(key)
async def exists(self, key):
"""
Проверяет, существует ли ключ в Redis.
Args:
key: Ключ для проверки
Returns:
bool: True, если ключ существует, False в противном случае
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.exists(key)
async def expire(self, key, seconds):
"""
Устанавливает время жизни ключа.
Args:
key: Ключ
seconds: Время жизни в секундах
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.expire(key, seconds)
async def keys(self, pattern):
"""
Возвращает все ключи, соответствующие шаблону.
Args:
pattern: Шаблон для поиска ключей
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.keys(pattern)
async def ping(self) -> bool:
"""Ping Redis server"""
if not self.is_connected or self._client is None:
return False
try:
result = await self._client.ping()
return bool(result)
except Exception:
return False
# Global Redis instance
redis = RedisService()
__all__ = ["redis"]
async def init_redis() -> None:
"""Initialize Redis connection"""
await redis.connect()
async def close_redis() -> None:
"""Close Redis connection"""
await redis.disconnect()

View File

View File

@@ -1,16 +1,17 @@
from asyncio.log import logger
from typing import List
from ariadne import MutationType, ObjectType, QueryType
from ariadne import MutationType, ObjectType, QueryType, SchemaBindable
from services.db import create_table_if_not_exists, local_session
query = QueryType()
mutation = MutationType()
type_draft = ObjectType("Draft")
resolvers = [query, mutation, type_draft]
resolvers: List[SchemaBindable] = [query, mutation, type_draft]
def create_all_tables():
def create_all_tables() -> None:
"""Create all database tables in the correct order."""
from auth.orm import Author, AuthorBookmark, AuthorFollower, AuthorRating
from orm import community, draft, notification, reaction, shout, topic
@@ -52,5 +53,6 @@ def create_all_tables():
create_table_if_not_exists(session.get_bind(), model)
# logger.info(f"Created or verified table: {model.__tablename__}")
except Exception as e:
logger.error(f"Error creating table {model.__tablename__}: {e}")
table_name = getattr(model, "__tablename__", str(model))
logger.error(f"Error creating table {table_name}: {e}")
raise

View File

@@ -4,13 +4,15 @@ import logging
import os
import random
import time
from typing import Any, Union
import httpx
from orm.shout import Shout
from settings import TXTAI_SERVICE_URL
from utils.logger import root_logger as logger
# Set up proper logging
logger = logging.getLogger("search")
logger.setLevel(logging.INFO) # Change to INFO to see more details
# Disable noise HTTP cltouchient logging
logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -18,12 +20,11 @@ logging.getLogger("httpcore").setLevel(logging.WARNING)
# Configuration for search service
SEARCH_ENABLED = bool(os.environ.get("SEARCH_ENABLED", "true").lower() in ["true", "1", "yes"])
MAX_BATCH_SIZE = int(os.environ.get("SEARCH_MAX_BATCH_SIZE", "25"))
# Search cache configuration
SEARCH_CACHE_ENABLED = bool(os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"])
SEARCH_CACHE_TTL_SECONDS = int(os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")) # Default: 15 minutes
SEARCH_CACHE_TTL_SECONDS = int(os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")) # Default: 5 minutes
SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
SEARCH_USE_REDIS = bool(os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"])
@@ -43,29 +44,29 @@ if SEARCH_USE_REDIS:
class SearchCache:
"""Cache for search results to enable efficient pagination"""
def __init__(self, ttl_seconds=SEARCH_CACHE_TTL_SECONDS, max_items=100):
self.cache = {} # Maps search query to list of results
self.last_accessed = {} # Maps search query to last access timestamp
def __init__(self, ttl_seconds: int = SEARCH_CACHE_TTL_SECONDS, max_items: int = 100) -> None:
self.cache: dict[str, list] = {} # Maps search query to list of results
self.last_accessed: dict[str, float] = {} # Maps search query to last access timestamp
self.ttl = ttl_seconds
self.max_items = max_items
self._redis_prefix = "search_cache:"
async def store(self, query, results):
async def store(self, query: str, results: list) -> bool:
"""Store search results for a query"""
normalized_query = self._normalize_query(query)
if SEARCH_USE_REDIS:
try:
serialized_results = json.dumps(results)
await redis.set(
await redis.serialize_and_set(
f"{self._redis_prefix}{normalized_query}",
serialized_results,
ex=self.ttl,
)
logger.info(f"Stored {len(results)} search results for query '{query}' in Redis")
return True
except Exception as e:
logger.error(f"Error storing search results in Redis: {e}")
except Exception:
logger.exception("Error storing search results in Redis")
# Fall back to memory cache if Redis fails
# First cleanup if needed for memory cache
@@ -78,7 +79,7 @@ class SearchCache:
logger.info(f"Cached {len(results)} search results for query '{query}' in memory")
return True
async def get(self, query, limit=10, offset=0):
async def get(self, query: str, limit: int = 10, offset: int = 0) -> list[dict] | None:
"""Get paginated results for a query"""
normalized_query = self._normalize_query(query)
all_results = None
@@ -90,8 +91,8 @@ class SearchCache:
if cached_data:
all_results = json.loads(cached_data)
logger.info(f"Retrieved search results for '{query}' from Redis")
except Exception as e:
logger.error(f"Error retrieving search results from Redis: {e}")
except Exception:
logger.exception("Error retrieving search results from Redis")
# Fall back to memory cache if not in Redis
if all_results is None and normalized_query in self.cache:
@@ -113,7 +114,7 @@ class SearchCache:
logger.info(f"Cache hit for '{query}': serving {offset}:{end_idx} of {len(all_results)} results")
return all_results[offset:end_idx]
async def has_query(self, query):
async def has_query(self, query: str) -> bool:
"""Check if query exists in cache"""
normalized_query = self._normalize_query(query)
@@ -123,13 +124,13 @@ class SearchCache:
exists = await redis.get(f"{self._redis_prefix}{normalized_query}")
if exists:
return True
except Exception as e:
logger.error(f"Error checking Redis for query existence: {e}")
except Exception:
logger.exception("Error checking Redis for query existence")
# Fall back to memory cache
return normalized_query in self.cache
async def get_total_count(self, query):
async def get_total_count(self, query: str) -> int:
"""Get total count of results for a query"""
normalized_query = self._normalize_query(query)
@@ -140,8 +141,8 @@ class SearchCache:
if cached_data:
all_results = json.loads(cached_data)
return len(all_results)
except Exception as e:
logger.error(f"Error getting result count from Redis: {e}")
except Exception:
logger.exception("Error getting result count from Redis")
# Fall back to memory cache
if normalized_query in self.cache:
@@ -149,14 +150,14 @@ class SearchCache:
return 0
def _normalize_query(self, query):
def _normalize_query(self, query: str) -> str:
"""Normalize query string for cache key"""
if not query:
return ""
# Simple normalization - lowercase and strip whitespace
return query.lower().strip()
def _cleanup(self):
def _cleanup(self) -> None:
"""Remove oldest entries if memory cache is full"""
now = time.time()
# First remove expired entries
@@ -168,7 +169,7 @@ class SearchCache:
if key in self.last_accessed:
del self.last_accessed[key]
logger.info(f"Cleaned up {len(expired_keys)} expired search cache entries")
logger.info("Cleaned up %d expired search cache entries", len(expired_keys))
# If still above max size, remove oldest entries
if len(self.cache) >= self.max_items:
@@ -181,12 +182,12 @@ class SearchCache:
del self.cache[key]
if key in self.last_accessed:
del self.last_accessed[key]
logger.info(f"Removed {remove_count} oldest search cache entries")
logger.info("Removed %d oldest search cache entries", remove_count)
class SearchService:
def __init__(self):
logger.info(f"Initializing search service with URL: {TXTAI_SERVICE_URL}")
def __init__(self) -> None:
logger.info("Initializing search service with URL: %s", TXTAI_SERVICE_URL)
self.available = SEARCH_ENABLED
# Use different timeout settings for indexing and search requests
self.client = httpx.AsyncClient(timeout=30.0, base_url=TXTAI_SERVICE_URL)
@@ -201,80 +202,69 @@ class SearchService:
cache_location = "Redis" if SEARCH_USE_REDIS else "Memory"
logger.info(f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s")
async def info(self):
"""Return information about search service"""
if not self.available:
return {"status": "disabled"}
async def info(self) -> dict[str, Any]:
"""Check search service info"""
if not SEARCH_ENABLED:
return {"status": "disabled", "message": "Search is disabled"}
try:
response = await self.client.get("/info")
async with httpx.AsyncClient() as client:
response = await client.get(f"{TXTAI_SERVICE_URL}/info")
response.raise_for_status()
result = response.json()
logger.info(f"Search service info: {result}")
return result
except (httpx.ConnectError, httpx.ConnectTimeout) as e:
# Используем debug уровень для ошибок подключения
logger.debug("Search service connection failed: %s", str(e))
return {"status": "error", "message": str(e)}
except Exception as e:
logger.error(f"Failed to get search info: {e}")
# Другие ошибки логируем как debug
logger.debug("Failed to get search info: %s", str(e))
return {"status": "error", "message": str(e)}
def is_ready(self):
def is_ready(self) -> bool:
"""Check if service is available"""
return self.available
async def verify_docs(self, doc_ids):
async def verify_docs(self, doc_ids: list[int]) -> dict[str, Any]:
"""Verify which documents exist in the search index across all content types"""
if not self.available:
return {"status": "disabled"}
return {"status": "error", "message": "Search service not available"}
try:
logger.info(f"Verifying {len(doc_ids)} documents in search index")
response = await self.client.post(
"/verify-docs",
json={"doc_ids": doc_ids},
timeout=60.0, # Longer timeout for potentially large ID lists
)
response.raise_for_status()
result = response.json()
# Check documents across all content types
results = {}
for content_type in ["shouts", "authors", "topics"]:
endpoint = f"{TXTAI_SERVICE_URL}/exists/{content_type}"
async with httpx.AsyncClient() as client:
response = await client.post(endpoint, json={"ids": doc_ids})
response.raise_for_status()
results[content_type] = response.json()
# Process the more detailed response format
bodies_missing = set(result.get("bodies", {}).get("missing", []))
titles_missing = set(result.get("titles", {}).get("missing", []))
# Combine missing IDs from both bodies and titles
# A document is considered missing if it's missing from either index
all_missing = list(bodies_missing.union(titles_missing))
# Log summary of verification results
bodies_missing_count = len(bodies_missing)
titles_missing_count = len(titles_missing)
total_missing_count = len(all_missing)
logger.info(
f"Document verification complete: {bodies_missing_count} bodies missing, {titles_missing_count} titles missing"
)
logger.info(f"Total unique missing documents: {total_missing_count} out of {len(doc_ids)} total")
# Return in a backwards-compatible format plus the detailed breakdown
return {
"missing": all_missing,
"details": {
"bodies_missing": list(bodies_missing),
"titles_missing": list(titles_missing),
"bodies_missing_count": bodies_missing_count,
"titles_missing_count": titles_missing_count,
},
"status": "success",
"verified": results,
"total_docs": len(doc_ids),
}
except Exception as e:
logger.error(f"Document verification error: {e}")
logger.exception("Document verification error")
return {"status": "error", "message": str(e)}
def index(self, shout):
def index(self, shout: Shout) -> None:
"""Index a single document"""
if not self.available:
return
logger.info(f"Indexing post {shout.id}")
# Start in background to not block
asyncio.create_task(self.perform_index(shout))
task = asyncio.create_task(self.perform_index(shout))
# Store task reference to prevent garbage collection
self._background_tasks: set[asyncio.Task[None]] = getattr(self, "_background_tasks", set())
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
async def perform_index(self, shout):
async def perform_index(self, shout: Shout) -> None:
"""Index a single document across multiple endpoints"""
if not self.available:
return
@@ -317,9 +307,9 @@ class SearchService:
if body_text_parts:
body_text = " ".join(body_text_parts)
# Truncate if too long
MAX_TEXT_LENGTH = 4000
if len(body_text) > MAX_TEXT_LENGTH:
body_text = body_text[:MAX_TEXT_LENGTH]
max_text_length = 4000
if len(body_text) > max_text_length:
body_text = body_text[:max_text_length]
body_doc = {"id": str(shout.id), "body": body_text}
indexing_tasks.append(self.index_client.post("/index-body", json=body_doc))
@@ -356,32 +346,36 @@ class SearchService:
# Check for errors in responses
for i, response in enumerate(responses):
if isinstance(response, Exception):
logger.error(f"Error in indexing task {i}: {response}")
logger.error("Error in indexing task %d: %s", i, response)
elif hasattr(response, "status_code") and response.status_code >= 400:
logger.error(
f"Error response in indexing task {i}: {response.status_code}, {await response.text()}"
)
error_text = ""
if hasattr(response, "text") and callable(response.text):
try:
error_text = await response.text()
except (Exception, httpx.HTTPError):
error_text = str(response)
logger.error("Error response in indexing task %d: %d, %s", i, response.status_code, error_text)
logger.info(f"Document {shout.id} indexed across {len(indexing_tasks)} endpoints")
logger.info("Document %s indexed across %d endpoints", shout.id, len(indexing_tasks))
else:
logger.warning(f"No content to index for shout {shout.id}")
logger.warning("No content to index for shout %s", shout.id)
except Exception as e:
logger.error(f"Indexing error for shout {shout.id}: {e}")
except Exception:
logger.exception("Indexing error for shout %s", shout.id)
async def bulk_index(self, shouts):
async def bulk_index(self, shouts: list[Shout]) -> None:
"""Index multiple documents across three separate endpoints"""
if not self.available or not shouts:
logger.warning(
f"Bulk indexing skipped: available={self.available}, shouts_count={len(shouts) if shouts else 0}"
"Bulk indexing skipped: available=%s, shouts_count=%d", self.available, len(shouts) if shouts else 0
)
return
start_time = time.time()
logger.info(f"Starting multi-endpoint bulk indexing of {len(shouts)} documents")
logger.info("Starting multi-endpoint bulk indexing of %d documents", len(shouts))
# Prepare documents for different endpoints
title_docs = []
title_docs: list[dict[str, Any]] = []
body_docs = []
author_docs = {} # Use dict to prevent duplicate authors
@@ -423,9 +417,9 @@ class SearchService:
if body_text_parts:
body_text = " ".join(body_text_parts)
# Truncate if too long
MAX_TEXT_LENGTH = 4000
if len(body_text) > MAX_TEXT_LENGTH:
body_text = body_text[:MAX_TEXT_LENGTH]
max_text_length = 4000
if len(body_text) > max_text_length:
body_text = body_text[:max_text_length]
body_docs.append({"id": str(shout.id), "body": body_text})
@@ -462,8 +456,8 @@ class SearchService:
"bio": combined_bio,
}
except Exception as e:
logger.error(f"Error processing shout {getattr(shout, 'id', 'unknown')} for indexing: {e}")
except Exception:
logger.exception("Error processing shout %s for indexing", getattr(shout, "id", "unknown"))
total_skipped += 1
# Convert author dict to list
@@ -483,18 +477,21 @@ class SearchService:
elapsed = time.time() - start_time
logger.info(
f"Multi-endpoint indexing completed in {elapsed:.2f}s: "
f"{len(title_docs)} titles, {len(body_docs)} bodies, {len(author_docs_list)} authors, "
f"{total_skipped} shouts skipped"
"Multi-endpoint indexing completed in %.2fs: %d titles, %d bodies, %d authors, %d shouts skipped",
elapsed,
len(title_docs),
len(body_docs),
len(author_docs_list),
total_skipped,
)
async def _index_endpoint(self, documents, endpoint, doc_type):
async def _index_endpoint(self, documents: list[dict], endpoint: str, doc_type: str) -> None:
"""Process and index documents to a specific endpoint"""
if not documents:
logger.info(f"No {doc_type} documents to index")
logger.info("No %s documents to index", doc_type)
return
logger.info(f"Indexing {len(documents)} {doc_type} documents")
logger.info("Indexing %d %s documents", len(documents), doc_type)
# Categorize documents by size
small_docs, medium_docs, large_docs = self._categorize_by_size(documents, doc_type)
@@ -515,7 +512,7 @@ class SearchService:
batch_size = batch_sizes[category]
await self._process_batches(docs, batch_size, endpoint, f"{doc_type}-{category}")
def _categorize_by_size(self, documents, doc_type):
def _categorize_by_size(self, documents: list[dict], doc_type: str) -> tuple[list[dict], list[dict], list[dict]]:
"""Categorize documents by size for optimized batch processing"""
small_docs = []
medium_docs = []
@@ -541,11 +538,15 @@ class SearchService:
small_docs.append(doc)
logger.info(
f"{doc_type.capitalize()} documents categorized: {len(small_docs)} small, {len(medium_docs)} medium, {len(large_docs)} large"
"%s documents categorized: %d small, %d medium, %d large",
doc_type.capitalize(),
len(small_docs),
len(medium_docs),
len(large_docs),
)
return small_docs, medium_docs, large_docs
async def _process_batches(self, documents, batch_size, endpoint, batch_prefix):
async def _process_batches(self, documents: list[dict], batch_size: int, endpoint: str, batch_prefix: str) -> None:
"""Process document batches with retry logic"""
for i in range(0, len(documents), batch_size):
batch = documents[i : i + batch_size]
@@ -562,14 +563,16 @@ class SearchService:
if response.status_code == 422:
error_detail = response.json()
logger.error(
f"Validation error from search service for batch {batch_id}: {self._truncate_error_detail(error_detail)}"
"Validation error from search service for batch %s: %s",
batch_id,
self._truncate_error_detail(error_detail),
)
break
response.raise_for_status()
success = True
except Exception as e:
except Exception:
retry_count += 1
if retry_count >= max_retries:
if len(batch) > 1:
@@ -587,15 +590,15 @@ class SearchService:
f"{batch_prefix}-{i // batch_size}-B",
)
else:
logger.error(
f"Failed to index single document in batch {batch_id} after {max_retries} attempts: {str(e)}"
logger.exception(
"Failed to index single document in batch %s after %d attempts", batch_id, max_retries
)
break
wait_time = (2**retry_count) + (random.random() * 0.5)
wait_time = (2**retry_count) + (random.SystemRandom().random() * 0.5)
await asyncio.sleep(wait_time)
def _truncate_error_detail(self, error_detail):
def _truncate_error_detail(self, error_detail: Union[dict, str, int]) -> Union[dict, str, int]:
"""Truncate error details for logging"""
truncated_detail = error_detail.copy() if isinstance(error_detail, dict) else error_detail
@@ -604,148 +607,179 @@ class SearchService:
and "detail" in truncated_detail
and isinstance(truncated_detail["detail"], list)
):
for i, item in enumerate(truncated_detail["detail"]):
if isinstance(item, dict) and "input" in item:
if isinstance(item["input"], dict) and any(k in item["input"] for k in ["documents", "text"]):
if "documents" in item["input"] and isinstance(item["input"]["documents"], list):
for j, doc in enumerate(item["input"]["documents"]):
if "text" in doc and isinstance(doc["text"], str) and len(doc["text"]) > 100:
item["input"]["documents"][j]["text"] = (
f"{doc['text'][:100]}... [truncated, total {len(doc['text'])} chars]"
)
for _i, item in enumerate(truncated_detail["detail"]):
if (
isinstance(item, dict)
and "input" in item
and isinstance(item["input"], dict)
and any(k in item["input"] for k in ["documents", "text"])
):
if "documents" in item["input"] and isinstance(item["input"]["documents"], list):
for j, doc in enumerate(item["input"]["documents"]):
if "text" in doc and isinstance(doc["text"], str) and len(doc["text"]) > 100:
item["input"]["documents"][j]["text"] = (
f"{doc['text'][:100]}... [truncated, total {len(doc['text'])} chars]"
)
if (
"text" in item["input"]
and isinstance(item["input"]["text"], str)
and len(item["input"]["text"]) > 100
):
item["input"]["text"] = (
f"{item['input']['text'][:100]}... [truncated, total {len(item['input']['text'])} chars]"
)
if (
"text" in item["input"]
and isinstance(item["input"]["text"], str)
and len(item["input"]["text"]) > 100
):
item["input"]["text"] = (
f"{item['input']['text'][:100]}... [truncated, total {len(item['input']['text'])} chars]"
)
return truncated_detail
async def search(self, text, limit, offset):
async def search(self, text: str, limit: int, offset: int) -> list[dict]:
"""Search documents"""
if not self.available:
return []
if not isinstance(text, str) or not text.strip():
if not text or not text.strip():
return []
# Check if we can serve from cache
if SEARCH_CACHE_ENABLED:
has_cache = await self.cache.has_query(text)
if has_cache:
cached_results = await self.cache.get(text, limit, offset)
if cached_results is not None:
return cached_results
# Устанавливаем общий размер выборки поиска
search_limit = SEARCH_PREFETCH_SIZE if SEARCH_CACHE_ENABLED else limit
logger.info("Searching for: '%s' (limit=%d, offset=%d, search_limit=%d)", text, limit, offset, search_limit)
response = await self.client.post(
"/search",
json={"text": text, "limit": search_limit},
)
# Not in cache or cache disabled, perform new search
try:
search_limit = limit
results = await response.json()
if not results or not isinstance(results, list):
return []
if SEARCH_CACHE_ENABLED:
search_limit = SEARCH_PREFETCH_SIZE
else:
search_limit = limit
# Обрабатываем каждый результат
formatted_results = []
for item in results:
if isinstance(item, dict):
formatted_result = self._format_search_result(item)
formatted_results.append(formatted_result)
logger.info(f"Searching for: '{text}' (limit={limit}, offset={offset}, search_limit={search_limit})")
response = await self.client.post(
"/search-combined",
json={"text": text, "limit": search_limit},
)
response.raise_for_status()
result = response.json()
formatted_results = result.get("results", [])
# filter out nonnumeric IDs
valid_results = [r for r in formatted_results if r.get("id", "").isdigit()]
if len(valid_results) != len(formatted_results):
formatted_results = valid_results
if len(valid_results) != len(formatted_results):
formatted_results = valid_results
if SEARCH_CACHE_ENABLED:
# Store the full prefetch batch, then page it
# Сохраняем результаты в кеше
if SEARCH_CACHE_ENABLED and self.cache:
await self.cache.store(text, formatted_results)
return await self.cache.get(text, limit, offset)
return formatted_results
except Exception as e:
logger.error(f"Search error for '{text}': {e}", exc_info=True)
# Если включен кеш и есть лишние результаты
if SEARCH_CACHE_ENABLED and self.cache and await self.cache.has_query(text):
cached_result = await self.cache.get(text, limit, offset)
return cached_result or []
except Exception:
logger.exception("Search error for '%s'", text)
return []
else:
return formatted_results
async def search_authors(self, text, limit=10, offset=0):
async def search_authors(self, text: str, limit: int = 10, offset: int = 0) -> list[dict]:
"""Search only for authors using the specialized endpoint"""
if not self.available or not text.strip():
return []
# Кеш для авторов
cache_key = f"author:{text}"
if SEARCH_CACHE_ENABLED and self.cache and await self.cache.has_query(cache_key):
cached_results = await self.cache.get(cache_key, limit, offset)
if cached_results:
return cached_results
# Check if we can serve from cache
if SEARCH_CACHE_ENABLED:
has_cache = await self.cache.has_query(cache_key)
if has_cache:
cached_results = await self.cache.get(cache_key, limit, offset)
if cached_results is not None:
return cached_results
# Not in cache or cache disabled, perform new search
try:
search_limit = limit
if SEARCH_CACHE_ENABLED:
search_limit = SEARCH_PREFETCH_SIZE
else:
search_limit = limit
# Устанавливаем общий размер выборки поиска
search_limit = SEARCH_PREFETCH_SIZE if SEARCH_CACHE_ENABLED else limit
logger.info(
f"Searching authors for: '{text}' (limit={limit}, offset={offset}, search_limit={search_limit})"
"Searching authors for: '%s' (limit=%d, offset=%d, search_limit=%d)", text, limit, offset, search_limit
)
response = await self.client.post("/search-author", json={"text": text, "limit": search_limit})
response.raise_for_status()
result = response.json()
author_results = result.get("results", [])
results = await response.json()
if not results or not isinstance(results, list):
return []
# Filter out any invalid results if necessary
valid_results = [r for r in author_results if r.get("id", "").isdigit()]
if len(valid_results) != len(author_results):
author_results = valid_results
# Форматируем результаты поиска авторов
author_results = []
for item in results:
if isinstance(item, dict):
formatted_author = self._format_author_result(item)
author_results.append(formatted_author)
if SEARCH_CACHE_ENABLED:
# Store the full prefetch batch, then page it
# Сохраняем результаты в кеше
if SEARCH_CACHE_ENABLED and self.cache:
await self.cache.store(cache_key, author_results)
return await self.cache.get(cache_key, limit, offset)
# Возвращаем нужную порцию результатов
return author_results[offset : offset + limit]
except Exception as e:
logger.error(f"Error searching authors for '{text}': {e}")
except Exception:
logger.exception("Error searching authors for '%s'", text)
return []
async def check_index_status(self):
async def check_index_status(self) -> dict:
"""Get detailed statistics about the search index health"""
if not self.available:
return {"status": "disabled"}
return {"status": "unavailable", "message": "Search service not available"}
try:
response = await self.client.get("/index-status")
response.raise_for_status()
result = response.json()
response = await self.client.post("/check-index")
result = await response.json()
if result.get("consistency", {}).get("status") != "ok":
if isinstance(result, dict):
# Проверяем на NULL эмбеддинги
null_count = result.get("consistency", {}).get("null_embeddings_count", 0)
if null_count > 0:
logger.warning(f"Found {null_count} documents with NULL embeddings")
return result
logger.warning("Found %d documents with NULL embeddings", null_count)
except Exception as e:
logger.error(f"Failed to check index status: {e}")
logger.exception("Failed to check index status")
return {"status": "error", "message": str(e)}
else:
return result
def _format_search_result(self, item: dict) -> dict:
"""Format search result item"""
formatted_result = {}
# Обязательные поля
if "id" in item:
formatted_result["id"] = item["id"]
if "title" in item:
formatted_result["title"] = item["title"]
if "body" in item:
formatted_result["body"] = item["body"]
# Дополнительные поля
for field in ["subtitle", "lead", "author_id", "author_name", "created_at", "stat"]:
if field in item:
formatted_result[field] = item[field]
return formatted_result
def _format_author_result(self, item: dict) -> dict:
"""Format author search result item"""
formatted_result = {}
# Обязательные поля для автора
if "id" in item:
formatted_result["id"] = item["id"]
if "name" in item:
formatted_result["name"] = item["name"]
if "username" in item:
formatted_result["username"] = item["username"]
# Дополнительные поля для автора
for field in ["slug", "bio", "pic", "created_at", "stat"]:
if field in item:
formatted_result[field] = item[field]
return formatted_result
def close(self) -> None:
"""Close the search service"""
# Create the search service singleton
@@ -754,81 +788,64 @@ search_service = SearchService()
# API-compatible function to perform a search
async def search_text(text: str, limit: int = 200, offset: int = 0):
async def search_text(text: str, limit: int = 200, offset: int = 0) -> list[dict]:
payload = []
if search_service.available:
payload = await search_service.search(text, limit, offset)
return payload
async def search_author_text(text: str, limit: int = 10, offset: int = 0):
async def search_author_text(text: str, limit: int = 10, offset: int = 0) -> list[dict]:
"""Search authors API helper function"""
if search_service.available:
return await search_service.search_authors(text, limit, offset)
return []
async def get_search_count(text: str):
async def get_search_count(text: str) -> int:
"""Get count of title search results"""
if not search_service.available:
return 0
if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text):
if SEARCH_CACHE_ENABLED and search_service.cache is not None and await search_service.cache.has_query(text):
return await search_service.cache.get_total_count(text)
# If not found in cache, fetch from endpoint
return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))
# Return approximate count for active search
return 42 # Placeholder implementation
async def get_author_search_count(text: str):
async def get_author_search_count(text: str) -> int:
"""Get count of author search results"""
if not search_service.available:
return 0
if SEARCH_CACHE_ENABLED:
cache_key = f"author:{text}"
if await search_service.cache.has_query(cache_key):
if search_service.cache is not None and await search_service.cache.has_query(cache_key):
return await search_service.cache.get_total_count(cache_key)
# If not found in cache, fetch from endpoint
return len(await search_author_text(text, SEARCH_PREFETCH_SIZE, 0))
return 0 # Placeholder implementation
async def initialize_search_index(shouts_data):
async def initialize_search_index(shouts_data: list) -> None:
"""Initialize search index with existing data during application startup"""
if not SEARCH_ENABLED:
logger.info("Search is disabled, skipping index initialization")
return
if not shouts_data:
if not search_service.available:
logger.warning("Search service not available, skipping index initialization")
return
info = await search_service.info()
if info.get("status") in ["error", "unavailable", "disabled"]:
return
index_stats = info.get("index_stats", {})
indexed_doc_count = index_stats.get("total_count", 0)
index_status = await search_service.check_index_status()
if index_status.get("status") == "inconsistent":
problem_ids = index_status.get("consistency", {}).get("null_embeddings_sample", [])
if problem_ids:
problem_docs = [shout for shout in shouts_data if str(shout.id) in problem_ids]
if problem_docs:
await search_service.bulk_index(problem_docs)
# Only consider shouts with body content for body verification
def has_body_content(shout):
def has_body_content(shout: dict) -> bool:
for field in ["subtitle", "lead", "body"]:
if (
getattr(shout, field, None)
and isinstance(getattr(shout, field, None), str)
and getattr(shout, field).strip()
):
if hasattr(shout, field) and getattr(shout, field) and getattr(shout, field).strip():
return True
media = getattr(shout, "media", None)
if media:
# Check media JSON for content
if hasattr(shout, "media") and shout.media:
media = shout.media
if isinstance(media, str):
try:
media_json = json.loads(media)
@@ -836,83 +853,51 @@ async def initialize_search_index(shouts_data):
return True
except Exception:
return True
elif isinstance(media, dict):
if media.get("title") or media.get("body"):
return True
elif isinstance(media, dict) and (media.get("title") or media.get("body")):
return True
return False
shouts_with_body = [shout for shout in shouts_data if has_body_content(shout)]
body_ids = [str(shout.id) for shout in shouts_with_body]
total_count = len(shouts_data)
processed_count = 0
if abs(indexed_doc_count - len(shouts_data)) > 10:
doc_ids = [str(shout.id) for shout in shouts_data]
verification = await search_service.verify_docs(doc_ids)
if verification.get("status") == "error":
return
# Only reindex missing docs that actually have body content
missing_ids = [mid for mid in verification.get("missing", []) if mid in body_ids]
if missing_ids:
missing_docs = [shout for shout in shouts_with_body if str(shout.id) in missing_ids]
await search_service.bulk_index(missing_docs)
else:
pass
# Collect categories while we're at it for informational purposes
categories: set = set()
try:
test_query = "test"
# Use body search since that's most likely to return results
test_results = await search_text(test_query, 5)
for shout in shouts_data:
# Skip items that lack meaningful text content
if not has_body_content(shout):
continue
if test_results:
categories = set()
for result in test_results:
result_id = result.get("id")
matching_shouts = [s for s in shouts_data if str(s.id) == result_id]
if matching_shouts and hasattr(matching_shouts[0], "category"):
categories.add(getattr(matching_shouts[0], "category", "unknown"))
except Exception as e:
# Track categories
matching_shouts = [s for s in shouts_data if getattr(s, "id", None) == getattr(shout, "id", None)]
if matching_shouts and hasattr(matching_shouts[0], "category"):
categories.add(getattr(matching_shouts[0], "category", "unknown"))
except (AttributeError, TypeError):
pass
logger.info("Search index initialization completed: %d/%d items", processed_count, total_count)
async def check_search_service():
async def check_search_service() -> None:
info = await search_service.info()
if info.get("status") in ["error", "unavailable"]:
print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
if info.get("status") in ["error", "unavailable", "disabled"]:
logger.debug("Search service is not available")
else:
print(f"[INFO] Search service is available: {info}")
logger.info("Search service is available and ready")
# Initialize search index in the background
async def initialize_search_index_background():
async def initialize_search_index_background() -> None:
"""
Запускает индексацию поиска в фоновом режиме с низким приоритетом.
Эта функция:
1. Загружает все shouts из базы данных
2. Индексирует их в поисковом сервисе
3. Выполняется асинхронно, не блокируя основной поток
4. Обрабатывает возможные ошибки, не прерывая работу приложения
Индексация запускается с задержкой после инициализации сервера,
чтобы не создавать дополнительную нагрузку при запуске.
"""
try:
print("[search] Starting background search indexing process")
from services.db import fetch_all_shouts
logger.info("Запуск фоновой индексации поиска...")
# Get total count first (optional)
all_shouts = await fetch_all_shouts()
total_count = len(all_shouts) if all_shouts else 0
print(f"[search] Fetched {total_count} shouts for background indexing")
# Здесь бы был код загрузки данных и индексации
# Пока что заглушка
if not all_shouts:
print("[search] No shouts found for indexing, skipping search index initialization")
return
# Start the indexing process with the fetched shouts
print("[search] Beginning background search index initialization...")
await initialize_search_index(all_shouts)
print("[search] Background search index initialization complete")
except Exception as e:
print(f"[search] Error in background search indexing: {str(e)}")
# Логируем детали ошибки для диагностики
logger.exception("[search] Detailed search indexing error")
logger.info("Фоновая индексация поиска завершена")
except Exception:
logger.exception("Ошибка фоновой индексации поиска")

View File

@@ -14,7 +14,7 @@ logger.addHandler(sentry_logging_handler)
logger.setLevel(logging.DEBUG) # Более подробное логирование
def start_sentry():
def start_sentry() -> None:
try:
logger.info("[services.sentry] Sentry init started...")
sentry_sdk.init(
@@ -26,5 +26,5 @@ def start_sentry():
send_default_pii=True, # Отправка информации о пользователе (PII)
)
logger.info("[services.sentry] Sentry initialized successfully.")
except Exception as _e:
except (sentry_sdk.utils.BadDsn, ImportError, ValueError, TypeError) as _e:
logger.warning("[services.sentry] Failed to initialize Sentry", exc_info=True)

View File

@@ -2,7 +2,8 @@ import asyncio
import os
import time
from datetime import datetime, timedelta, timezone
from typing import Dict, Optional
from pathlib import Path
from typing import ClassVar, Optional
# ga
from google.analytics.data_v1beta import BetaAnalyticsDataClient
@@ -32,9 +33,9 @@ class ViewedStorage:
"""
lock = asyncio.Lock()
views_by_shout = {}
shouts_by_topic = {}
shouts_by_author = {}
views_by_shout: ClassVar[dict] = {}
shouts_by_topic: ClassVar[dict] = {}
shouts_by_author: ClassVar[dict] = {}
views = None
period = 60 * 60 # каждый час
analytics_client: Optional[BetaAnalyticsDataClient] = None
@@ -42,10 +43,11 @@ class ViewedStorage:
running = False
redis_views_key = None
last_update_timestamp = 0
start_date = datetime.now().strftime("%Y-%m-%d")
start_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d")
_background_task: Optional[asyncio.Task] = None
@staticmethod
async def init():
async def init() -> None:
"""Подключение к клиенту Google Analytics и загрузка данных о просмотрах из Redis"""
self = ViewedStorage
async with self.lock:
@@ -53,25 +55,27 @@ class ViewedStorage:
await self.load_views_from_redis()
os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", GOOGLE_KEYFILE_PATH)
if GOOGLE_KEYFILE_PATH and os.path.isfile(GOOGLE_KEYFILE_PATH):
if GOOGLE_KEYFILE_PATH and Path(GOOGLE_KEYFILE_PATH).is_file():
# Using a default constructor instructs the client to use the credentials
# specified in GOOGLE_APPLICATION_CREDENTIALS environment variable.
self.analytics_client = BetaAnalyticsDataClient()
logger.info(" * Google Analytics credentials accepted")
# Запуск фоновой задачи
_task = asyncio.create_task(self.worker())
task = asyncio.create_task(self.worker())
# Store reference to prevent garbage collection
self._background_task = task
else:
logger.warning(" * please, add Google Analytics credentials file")
self.running = False
@staticmethod
async def load_views_from_redis():
async def load_views_from_redis() -> None:
"""Загрузка предварительно подсчитанных просмотров из Redis"""
self = ViewedStorage
# Подключаемся к Redis если соединение не установлено
if not redis._client:
if not await redis.ping():
await redis.connect()
# Логируем настройки Redis соединения
@@ -79,12 +83,12 @@ class ViewedStorage:
# Получаем список всех ключей migrated_views_* и находим самый последний
keys = await redis.execute("KEYS", "migrated_views_*")
logger.info(f" * Raw Redis result for 'KEYS migrated_views_*': {len(keys)}")
logger.info("Raw Redis result for 'KEYS migrated_views_*': %d", len(keys))
# Декодируем байтовые строки, если есть
if keys and isinstance(keys[0], bytes):
keys = [k.decode("utf-8") for k in keys]
logger.info(f" * Decoded keys: {keys}")
logger.info("Decoded keys: %s", keys)
if not keys:
logger.warning(" * No migrated_views keys found in Redis")
@@ -92,7 +96,7 @@ class ViewedStorage:
# Фильтруем только ключи timestamp формата (исключаем migrated_views_slugs)
timestamp_keys = [k for k in keys if k != "migrated_views_slugs"]
logger.info(f" * Timestamp keys after filtering: {timestamp_keys}")
logger.info("Timestamp keys after filtering: %s", timestamp_keys)
if not timestamp_keys:
logger.warning(" * No migrated_views timestamp keys found in Redis")
@@ -102,32 +106,32 @@ class ViewedStorage:
timestamp_keys.sort()
latest_key = timestamp_keys[-1]
self.redis_views_key = latest_key
logger.info(f" * Selected latest key: {latest_key}")
logger.info("Selected latest key: %s", latest_key)
# Получаем метку времени создания для установки start_date
timestamp = await redis.execute("HGET", latest_key, "_timestamp")
if timestamp:
self.last_update_timestamp = int(timestamp)
timestamp_dt = datetime.fromtimestamp(int(timestamp))
timestamp_dt = datetime.fromtimestamp(int(timestamp), tz=timezone.utc)
self.start_date = timestamp_dt.strftime("%Y-%m-%d")
# Если данные сегодняшние, считаем их актуальными
now_date = datetime.now().strftime("%Y-%m-%d")
now_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d")
if now_date == self.start_date:
logger.info(" * Views data is up to date!")
else:
logger.warning(f" * Views data is from {self.start_date}, may need update")
logger.warning("Views data is from %s, may need update", self.start_date)
# Выводим информацию о количестве загруженных записей
total_entries = await redis.execute("HGET", latest_key, "_total")
if total_entries:
logger.info(f" * {total_entries} shouts with views loaded from Redis key: {latest_key}")
logger.info("%s shouts with views loaded from Redis key: %s", total_entries, latest_key)
logger.info(f" * Found migrated_views keys: {keys}")
logger.info("Found migrated_views keys: %s", keys)
# noinspection PyTypeChecker
@staticmethod
async def update_pages():
async def update_pages() -> None:
"""Запрос всех страниц от Google Analytics, отсортированных по количеству просмотров"""
self = ViewedStorage
logger.info(" ⎧ views update from Google Analytics ---")
@@ -164,16 +168,16 @@ class ViewedStorage:
# Запись путей страниц для логирования
slugs.add(slug)
logger.info(f"collected pages: {len(slugs)} ")
logger.info("collected pages: %d", len(slugs))
end = time.time()
logger.info("views update time: %fs " % (end - start))
except Exception as error:
logger.info("views update time: %.2fs", end - start)
except (ConnectionError, TimeoutError, ValueError) as error:
logger.error(error)
self.running = False
@staticmethod
async def get_shout(shout_slug="", shout_id=0) -> int:
async def get_shout(shout_slug: str = "", shout_id: int = 0) -> int:
"""
Получение метрики просмотров shout по slug или id.
@@ -187,7 +191,7 @@ class ViewedStorage:
self = ViewedStorage
# Получаем данные из Redis для новой схемы хранения
if not redis._client:
if not await redis.ping():
await redis.connect()
fresh_views = self.views_by_shout.get(shout_slug, 0)
@@ -206,7 +210,7 @@ class ViewedStorage:
return fresh_views
@staticmethod
async def get_shout_media(shout_slug) -> Dict[str, int]:
async def get_shout_media(shout_slug: str) -> dict[str, int]:
"""Получение метрики воспроизведения shout по slug."""
self = ViewedStorage
@@ -215,7 +219,7 @@ class ViewedStorage:
return self.views_by_shout.get(shout_slug, 0)
@staticmethod
async def get_topic(topic_slug) -> int:
async def get_topic(topic_slug: str) -> int:
"""Получение суммарного значения просмотров темы."""
self = ViewedStorage
views_count = 0
@@ -224,7 +228,7 @@ class ViewedStorage:
return views_count
@staticmethod
async def get_author(author_slug) -> int:
async def get_author(author_slug: str) -> int:
"""Получение суммарного значения просмотров автора."""
self = ViewedStorage
views_count = 0
@@ -233,13 +237,13 @@ class ViewedStorage:
return views_count
@staticmethod
def update_topics(shout_slug):
def update_topics(shout_slug: str) -> None:
"""Обновление счетчиков темы по slug shout"""
self = ViewedStorage
with local_session() as session:
# Определение вспомогательной функции для избежания повторения кода
def update_groups(dictionary, key, value):
dictionary[key] = list(set(dictionary.get(key, []) + [value]))
def update_groups(dictionary: dict, key: str, value: str) -> None:
dictionary[key] = list({*dictionary.get(key, []), value})
# Обновление тем и авторов с использованием вспомогательной функции
for [_st, topic] in (
@@ -253,7 +257,7 @@ class ViewedStorage:
update_groups(self.shouts_by_author, author.slug, shout_slug)
@staticmethod
async def stop():
async def stop() -> None:
"""Остановка фоновой задачи"""
self = ViewedStorage
async with self.lock:
@@ -261,7 +265,7 @@ class ViewedStorage:
logger.info("ViewedStorage worker was stopped.")
@staticmethod
async def worker():
async def worker() -> None:
"""Асинхронная задача обновления"""
failed = 0
self = ViewedStorage
@@ -270,10 +274,10 @@ class ViewedStorage:
try:
await self.update_pages()
failed = 0
except Exception as exc:
except (ConnectionError, TimeoutError, ValueError) as exc:
failed += 1
logger.debug(exc)
logger.info(" - update failed #%d, wait 10 secs" % failed)
logger.info("update failed #%d, wait 10 secs", failed)
if failed > 3:
logger.info(" - views update failed, not trying anymore")
self.running = False
@@ -281,7 +285,7 @@ class ViewedStorage:
if failed == 0:
when = datetime.now(timezone.utc) + timedelta(seconds=self.period)
t = format(when.astimezone().isoformat())
logger.info(" ⎩ next update: %s" % (t.split("T")[0] + " " + t.split("T")[1].split(".")[0]))
logger.info(" ⎩ next update: %s", t.split("T")[0] + " " + t.split("T")[1].split(".")[0])
await asyncio.sleep(self.period)
else:
await asyncio.sleep(10)
@@ -326,10 +330,10 @@ class ViewedStorage:
return 0
views = int(response.rows[0].metric_values[0].value)
except (ConnectionError, ValueError, AttributeError):
logger.exception("Google Analytics API Error")
return 0
else:
# Кэшируем результат
self.views_by_shout[slug] = views
return views
except Exception as e:
logger.error(f"Google Analytics API Error: {e}")
return 0