linted+fmt
All checks were successful
Deploy on push / deploy (push) Successful in 6s

This commit is contained in:
2025-05-29 12:37:39 +03:00
parent d4c16658bd
commit 4070f4fcde
49 changed files with 835 additions and 983 deletions

View File

@@ -1,16 +1,16 @@
from functools import wraps
from typing import Tuple
from sqlalchemy import exc
from starlette.requests import Request
from auth.internal import verify_internal_auth
from auth.orm import Author, Role
from cache.cache import get_cached_author_by_id
from resolvers.stat import get_with_stat
from utils.logger import root_logger as logger
from auth.internal import verify_internal_auth
from sqlalchemy import exc
from services.db import local_session
from auth.orm import Author, Role
from settings import SESSION_TOKEN_HEADER
from utils.logger import root_logger as logger
# Список разрешенных заголовков
ALLOWED_HEADERS = ["Authorization", "Content-Type"]
@@ -31,21 +31,21 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
- is_admin: bool - Флаг наличия у пользователя административных прав
"""
logger.debug(f"[check_auth] Проверка авторизации...")
# Получаем заголовок авторизации
token = None
# Проверяем заголовок с учетом регистра
headers_dict = dict(req.headers.items())
logger.debug(f"[check_auth] Все заголовки: {headers_dict}")
# Ищем заголовок Authorization независимо от регистра
for header_name, header_value in headers_dict.items():
if header_name.lower() == SESSION_TOKEN_HEADER.lower():
token = header_value
logger.debug(f"[check_auth] Найден заголовок {header_name}: {token[:10]}...")
break
if not token:
logger.debug(f"[check_auth] Токен не найден в заголовках")
return "", [], False
@@ -57,8 +57,10 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
# Проверяем авторизацию внутренним механизмом
logger.debug("[check_auth] Вызов verify_internal_auth...")
user_id, user_roles, is_admin = await verify_internal_auth(token)
logger.debug(f"[check_auth] Результат verify_internal_auth: user_id={user_id}, roles={user_roles}, is_admin={is_admin}")
logger.debug(
f"[check_auth] Результат verify_internal_auth: user_id={user_id}, roles={user_roles}, is_admin={is_admin}"
)
# Если в ролях нет админа, но есть ID - проверяем в БД
if user_id and not is_admin:
try:
@@ -71,16 +73,19 @@ async def check_auth(req: Request) -> Tuple[str, list[str], bool]:
else:
# Проверяем наличие админских прав через БД
from auth.orm import AuthorRole
admin_role = session.query(AuthorRole).filter(
AuthorRole.author == user_id_int,
AuthorRole.role.in_(["admin", "super"])
).first()
admin_role = (
session.query(AuthorRole)
.filter(AuthorRole.author == user_id_int, AuthorRole.role.in_(["admin", "super"]))
.first()
)
is_admin = admin_role is not None
except Exception as e:
logger.error(f"Ошибка при проверке прав администратора: {e}")
return user_id, user_roles, is_admin
async def add_user_role(user_id: str, roles: list[str] = None):
"""
Добавление ролей пользователю в локальной БД.
@@ -131,32 +136,32 @@ def login_required(f):
info = args[1]
req = info.context.get("request")
logger.debug(f"[login_required] Проверка авторизации для запроса: {req.method} {req.url.path}")
logger.debug(f"[login_required] Заголовки: {req.headers}")
user_id, user_roles, is_admin = await check_auth(req)
if not user_id:
logger.debug(f"[login_required] Пользователь не авторизован, {dict(req)}, {info}")
raise GraphQLError("Требуется авторизация")
# Проверяем наличие роли reader
if 'reader' not in user_roles:
if "reader" not in user_roles:
logger.error(f"Пользователь {user_id} не имеет роли 'reader'")
raise GraphQLError("У вас нет необходимых прав для доступа")
logger.info(f"Авторизован пользователь {user_id} с ролями: {user_roles}")
info.context["roles"] = user_roles
# Проверяем права администратора
info.context["is_admin"] = is_admin
author = await get_cached_author_by_id(user_id, get_with_stat)
if not author:
logger.error(f"Профиль автора не найден для пользователя {user_id}")
info.context["author"] = author
return await f(*args, **kwargs)
return decorated_function
@@ -177,7 +182,7 @@ def login_accepted(f):
if user_id and user_roles:
logger.info(f"login_accepted: Пользователь авторизован: {user_id} с ролями {user_roles}")
info.context["roles"] = user_roles
# Проверяем права администратора
info.context["is_admin"] = is_admin

View File

@@ -200,9 +200,7 @@ class Base(declarative_base()):
data[column_name] = value
else:
# Пропускаем атрибут, если его нет в объекте (может быть добавлен после миграции)
logger.debug(
f"Skipping missing attribute '{column_name}' for {self.__class__.__name__}"
)
logger.debug(f"Skipping missing attribute '{column_name}' for {self.__class__.__name__}")
except AttributeError as e:
logger.warning(f"Attribute error for column '{column_name}': {e}")
# Добавляем синтетическое поле .stat если оно существует
@@ -223,9 +221,7 @@ class Base(declarative_base()):
# Функция для вывода полного трейсбека при предупреждениях
def warning_with_traceback(
message: Warning | str, category, filename: str, lineno: int, file=None, line=None
):
def warning_with_traceback(message: Warning | str, category, filename: str, lineno: int, file=None, line=None):
tb = traceback.format_stack()
tb_str = "".join(tb)
return f"{message} ({filename}, {lineno}): {category.__name__}\n{tb_str}"
@@ -302,22 +298,22 @@ json_builder, json_array_builder, json_cast = get_json_builder()
# Fetch all shouts, with authors preloaded
# This function is used for search indexing
async def fetch_all_shouts(session=None):
"""Fetch all published shouts for search indexing with authors preloaded"""
from orm.shout import Shout
close_session = False
if session is None:
session = local_session()
close_session = True
try:
# Fetch only published and non-deleted shouts with authors preloaded
query = session.query(Shout).options(
joinedload(Shout.authors)
).filter(
Shout.published_at.is_not(None),
Shout.deleted_at.is_(None)
query = (
session.query(Shout)
.options(joinedload(Shout.authors))
.filter(Shout.published_at.is_not(None), Shout.deleted_at.is_(None))
)
shouts = query.all()
return shouts
@@ -326,4 +322,4 @@ async def fetch_all_shouts(session=None):
return []
finally:
if close_session:
session.close()
session.close()

View File

@@ -1,9 +1,11 @@
from typing import Dict, List, Optional, Set
from dataclasses import dataclass
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Set
from redis import Redis
from settings import REDIS_URL, ROOT_DIR
from utils.logger import root_logger as logger
@@ -31,12 +33,37 @@ class EnvManager:
# Стандартные переменные окружения, которые следует исключить
EXCLUDED_ENV_VARS: Set[str] = {
"PATH", "SHELL", "USER", "HOME", "PWD", "TERM", "LANG",
"PYTHONPATH", "_", "TMPDIR", "TERM_PROGRAM", "TERM_SESSION_ID",
"XPC_SERVICE_NAME", "XPC_FLAGS", "SHLVL", "SECURITYSESSIONID",
"LOGNAME", "OLDPWD", "ZSH", "PAGER", "LESS", "LC_CTYPE", "LSCOLORS",
"SSH_AUTH_SOCK", "DISPLAY", "COLORTERM", "EDITOR", "VISUAL",
"PYTHONDONTWRITEBYTECODE", "VIRTUAL_ENV", "PYTHONUNBUFFERED"
"PATH",
"SHELL",
"USER",
"HOME",
"PWD",
"TERM",
"LANG",
"PYTHONPATH",
"_",
"TMPDIR",
"TERM_PROGRAM",
"TERM_SESSION_ID",
"XPC_SERVICE_NAME",
"XPC_FLAGS",
"SHLVL",
"SECURITYSESSIONID",
"LOGNAME",
"OLDPWD",
"ZSH",
"PAGER",
"LESS",
"LC_CTYPE",
"LSCOLORS",
"SSH_AUTH_SOCK",
"DISPLAY",
"COLORTERM",
"EDITOR",
"VISUAL",
"PYTHONDONTWRITEBYTECODE",
"VIRTUAL_ENV",
"PYTHONUNBUFFERED",
}
# Секции для группировки переменных
@@ -44,57 +71,67 @@ class EnvManager:
"AUTH": {
"pattern": r"^(JWT|AUTH|SESSION|OAUTH|GITHUB|GOOGLE|FACEBOOK)_",
"name": "Авторизация",
"description": "Настройки системы авторизации"
"description": "Настройки системы авторизации",
},
"DATABASE": {
"pattern": r"^(DB|DATABASE|POSTGRES|MYSQL|SQL)_",
"name": "База данных",
"description": "Настройки подключения к базам данных"
"description": "Настройки подключения к базам данных",
},
"CACHE": {
"pattern": r"^(REDIS|CACHE|MEMCACHED)_",
"name": "Кэширование",
"description": "Настройки систем кэширования"
"description": "Настройки систем кэширования",
},
"SEARCH": {
"pattern": r"^(ELASTIC|SEARCH|OPENSEARCH)_",
"name": "Поиск",
"description": "Настройки поисковых систем"
"description": "Настройки поисковых систем",
},
"APP": {
"pattern": r"^(APP|PORT|HOST|DEBUG|DOMAIN|ENVIRONMENT|ENV|FRONTEND)_",
"name": "Общие настройки",
"description": "Общие настройки приложения"
"description": "Общие настройки приложения",
},
"LOGGING": {
"pattern": r"^(LOG|LOGGING|SENTRY|GLITCH|GLITCHTIP)_",
"name": "Мониторинг",
"description": "Настройки логирования и мониторинга"
"description": "Настройки логирования и мониторинга",
},
"EMAIL": {
"pattern": r"^(MAIL|EMAIL|SMTP|IMAP|POP3|POST)_",
"name": "Электронная почта",
"description": "Настройки отправки электронной почты"
"description": "Настройки отправки электронной почты",
},
"ANALYTICS": {
"pattern": r"^(GA|GOOGLE_ANALYTICS|ANALYTICS)_",
"name": "Аналитика",
"description": "Настройки систем аналитики"
"description": "Настройки систем аналитики",
},
}
# Переменные, которые следует всегда помечать как секретные
SECRET_VARS_PATTERNS = [
r".*TOKEN.*", r".*SECRET.*", r".*PASSWORD.*", r".*KEY.*",
r".*PWD.*", r".*PASS.*", r".*CRED.*", r".*_DSN.*",
r".*JWT.*", r".*SESSION.*", r".*OAUTH.*",
r".*GITHUB.*", r".*GOOGLE.*", r".*FACEBOOK.*"
r".*TOKEN.*",
r".*SECRET.*",
r".*PASSWORD.*",
r".*KEY.*",
r".*PWD.*",
r".*PASS.*",
r".*CRED.*",
r".*_DSN.*",
r".*JWT.*",
r".*SESSION.*",
r".*OAUTH.*",
r".*GITHUB.*",
r".*GOOGLE.*",
r".*FACEBOOK.*",
]
def __init__(self):
self.redis = Redis.from_url(REDIS_URL)
self.prefix = "env:"
self.env_file_path = os.path.join(ROOT_DIR, '.env')
self.env_file_path = os.path.join(ROOT_DIR, ".env")
def get_all_variables(self) -> List[EnvSection]:
"""
@@ -142,15 +179,15 @@ class EnvManager:
env_vars = {}
if os.path.exists(self.env_file_path):
try:
with open(self.env_file_path, 'r') as f:
with open(self.env_file_path, "r") as f:
for line in f:
line = line.strip()
# Пропускаем пустые строки и комментарии
if not line or line.startswith('#'):
if not line or line.startswith("#"):
continue
# Разделяем строку на ключ и значение
if '=' in line:
key, value = line.split('=', 1)
if "=" in line:
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
# Удаляем кавычки, если они есть
@@ -207,17 +244,17 @@ class EnvManager:
"""
Определяет тип переменной на основе ее значения
"""
if value.lower() in ('true', 'false'):
if value.lower() in ("true", "false"):
return "boolean"
if value.isdigit():
return "integer"
if re.match(r"^\d+\.\d+$", value):
return "float"
# Проверяем на JSON объект или массив
if (value.startswith('{') and value.endswith('}')) or (value.startswith('[') and value.endswith(']')):
if (value.startswith("{") and value.endswith("}")) or (value.startswith("[") and value.endswith("]")):
return "json"
# Проверяем на URL
if value.startswith(('http://', 'https://', 'redis://', 'postgresql://')):
if value.startswith(("http://", "https://", "redis://", "postgresql://")):
return "url"
return "string"
@@ -233,14 +270,9 @@ class EnvManager:
for key, value in variables.items():
is_secret = self._is_secret_variable(key)
var_type = self._determine_variable_type(value)
var = EnvVariable(
key=key,
value=value,
type=var_type,
is_secret=is_secret
)
var = EnvVariable(key=key, value=value, type=var_type, is_secret=is_secret)
# Определяем секцию для переменной
placed = False
for section_id, section_config in self.SECTIONS.items():
@@ -248,7 +280,7 @@ class EnvManager:
sections_dict[section_id].append(var)
placed = True
break
# Если переменная не попала ни в одну секцию
# if not placed:
# other_variables.append(var)
@@ -260,22 +292,20 @@ class EnvManager:
section_config = self.SECTIONS[section_id]
result.append(
EnvSection(
name=section_config["name"],
description=section_config["description"],
variables=variables
name=section_config["name"], description=section_config["description"], variables=variables
)
)
# Добавляем прочие переменные, если они есть
if other_variables:
result.append(
EnvSection(
name="Прочие переменные",
description="Переменные, не вошедшие в основные категории",
variables=other_variables
variables=other_variables,
)
)
return result
def update_variable(self, key: str, value: str) -> bool:
@@ -286,13 +316,13 @@ class EnvManager:
# Сохраняем в Redis
full_key = f"{self.prefix}{key}"
self.redis.set(full_key, value)
# Обновляем значение в .env файле
self._update_dotenv_var(key, value)
# Обновляем переменную в текущем процессе
os.environ[key] = value
return True
except Exception as e:
logger.error(f"Ошибка обновления переменной {key}: {e}")
@@ -305,20 +335,20 @@ class EnvManager:
try:
# Если файл .env не существует, создаем его
if not os.path.exists(self.env_file_path):
with open(self.env_file_path, 'w') as f:
with open(self.env_file_path, "w") as f:
f.write(f"{key}={value}\n")
return True
# Если файл существует, читаем его содержимое
lines = []
found = False
with open(self.env_file_path, 'r') as f:
with open(self.env_file_path, "r") as f:
for line in f:
if line.strip() and not line.strip().startswith('#'):
if line.strip() and not line.strip().startswith("#"):
if line.strip().startswith(f"{key}="):
# Экранируем значение, если необходимо
if ' ' in value or ',' in value or '"' in value or "'" in value:
if " " in value or "," in value or '"' in value or "'" in value:
escaped_value = f'"{value}"'
else:
escaped_value = value
@@ -328,20 +358,20 @@ class EnvManager:
lines.append(line)
else:
lines.append(line)
# Если переменной не было в файле, добавляем ее
if not found:
# Экранируем значение, если необходимо
if ' ' in value or ',' in value or '"' in value or "'" in value:
if " " in value or "," in value or '"' in value or "'" in value:
escaped_value = f'"{value}"'
else:
escaped_value = value
lines.append(f"{key}={escaped_value}\n")
# Записываем обновленный файл
with open(self.env_file_path, 'w') as f:
with open(self.env_file_path, "w") as f:
f.writelines(lines)
return True
except Exception as e:
logger.error(f"Ошибка обновления .env файла: {e}")
@@ -358,14 +388,14 @@ class EnvManager:
full_key = f"{self.prefix}{var.key}"
pipe.set(full_key, var.value)
pipe.execute()
# Обновляем переменные в .env файле
for var in variables:
self._update_dotenv_var(var.key, var.value)
# Обновляем переменную в текущем процессе
os.environ[var.key] = var.value
return True
except Exception as e:
logger.error(f"Ошибка массового обновления переменных: {e}")

View File

@@ -93,9 +93,7 @@ async def notify_draft(draft_data, action: str = "publish"):
# Если переданы связанные атрибуты, добавим их
if hasattr(draft_data, "topics") and draft_data.topics is not None:
draft_payload["topics"] = [
{"id": t.id, "name": t.name, "slug": t.slug} for t in draft_data.topics
]
draft_payload["topics"] = [{"id": t.id, "name": t.name, "slug": t.slug} for t in draft_data.topics]
if hasattr(draft_data, "authors") and draft_data.authors is not None:
draft_payload["authors"] = [

View File

@@ -30,7 +30,7 @@ class RedisService:
if self._client is None:
await self.connect()
logger.info(f"[redis] Автоматически установлено соединение при выполнении команды {command}")
if self._client:
try:
logger.debug(f"{command}") # {args[0]}") # {args} {kwargs}")
@@ -55,14 +55,14 @@ class RedisService:
if self._client is None:
# Выбрасываем исключение, так как pipeline нельзя создать до подключения
raise Exception("Redis client is not initialized. Call redis.connect() first.")
return self._client.pipeline()
async def subscribe(self, *channels):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.subscribe(channel)
@@ -71,7 +71,7 @@ class RedisService:
async def unsubscribe(self, *channels):
if self._client is None:
return
async with self._client.pubsub() as pubsub:
for channel in channels:
await pubsub.unsubscribe(channel)
@@ -81,14 +81,14 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self._client.publish(channel, data)
async def set(self, key, data, ex=None):
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
# Prepare the command arguments
args = [key, data]
@@ -104,7 +104,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self.execute("get", key)
async def delete(self, *keys):
@@ -119,11 +119,11 @@ class RedisService:
"""
if not keys:
return 0
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.delete(*keys)
async def hmset(self, key, mapping):
@@ -137,7 +137,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self._client.hset(key, mapping=mapping)
async def expire(self, key, seconds):
@@ -151,7 +151,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self._client.expire(key, seconds)
async def sadd(self, key, *values):
@@ -165,7 +165,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self._client.sadd(key, *values)
async def srem(self, key, *values):
@@ -179,7 +179,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self._client.srem(key, *values)
async def smembers(self, key):
@@ -195,9 +195,9 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.smembers(key)
async def exists(self, key):
"""
Проверяет, существует ли ключ в Redis.
@@ -210,10 +210,10 @@ class RedisService:
"""
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
await self.connect()
return await self._client.exists(key)
async def expire(self, key, seconds):
"""
Устанавливает время жизни ключа.
@@ -225,7 +225,7 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.expire(key, seconds)
async def keys(self, pattern):
@@ -238,10 +238,8 @@ class RedisService:
# Автоматически подключаемся к Redis, если соединение не установлено
if self._client is None:
await self.connect()
return await self._client.keys(pattern)
redis = RedisService()

View File

@@ -12,7 +12,7 @@ resolvers = [query, mutation, type_draft]
def create_all_tables():
"""Create all database tables in the correct order."""
from auth.orm import Author, AuthorFollower, AuthorBookmark, AuthorRating
from auth.orm import Author, AuthorBookmark, AuthorFollower, AuthorRating
from orm import community, draft, notification, reaction, shout, topic
# Порядок важен - сначала таблицы без внешних ключей, затем зависимые таблицы

View File

@@ -2,9 +2,11 @@ import asyncio
import json
import logging
import os
import httpx
import time
import random
import time
import httpx
from settings import TXTAI_SERVICE_URL
# Set up proper logging
@@ -15,23 +17,15 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
# Configuration for search service
SEARCH_ENABLED = bool(
os.environ.get("SEARCH_ENABLED", "true").lower() in ["true", "1", "yes"]
)
SEARCH_ENABLED = bool(os.environ.get("SEARCH_ENABLED", "true").lower() in ["true", "1", "yes"])
MAX_BATCH_SIZE = int(os.environ.get("SEARCH_MAX_BATCH_SIZE", "25"))
# Search cache configuration
SEARCH_CACHE_ENABLED = bool(
os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
)
SEARCH_CACHE_TTL_SECONDS = int(
os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")
) # Default: 15 minutes
SEARCH_CACHE_ENABLED = bool(os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"])
SEARCH_CACHE_TTL_SECONDS = int(os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")) # Default: 15 minutes
SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
SEARCH_USE_REDIS = bool(
os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
)
SEARCH_USE_REDIS = bool(os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"])
search_offset = 0
@@ -68,9 +62,7 @@ class SearchCache:
serialized_results,
ex=self.ttl,
)
logger.info(
f"Stored {len(results)} search results for query '{query}' in Redis"
)
logger.info(f"Stored {len(results)} search results for query '{query}' in Redis")
return True
except Exception as e:
logger.error(f"Error storing search results in Redis: {e}")
@@ -83,9 +75,7 @@ class SearchCache:
# Store results and update timestamp
self.cache[normalized_query] = results
self.last_accessed[normalized_query] = time.time()
logger.info(
f"Cached {len(results)} search results for query '{query}' in memory"
)
logger.info(f"Cached {len(results)} search results for query '{query}' in memory")
return True
async def get(self, query, limit=10, offset=0):
@@ -117,14 +107,10 @@ class SearchCache:
# Return paginated subset
end_idx = min(offset + limit, len(all_results))
if offset >= len(all_results):
logger.warning(
f"Requested offset {offset} exceeds result count {len(all_results)}"
)
logger.warning(f"Requested offset {offset} exceeds result count {len(all_results)}")
return []
logger.info(
f"Cache hit for '{query}': serving {offset}:{end_idx} of {len(all_results)} results"
)
logger.info(f"Cache hit for '{query}': serving {offset}:{end_idx} of {len(all_results)} results")
return all_results[offset:end_idx]
async def has_query(self, query):
@@ -174,11 +160,7 @@ class SearchCache:
"""Remove oldest entries if memory cache is full"""
now = time.time()
# First remove expired entries
expired_keys = [
key
for key, last_access in self.last_accessed.items()
if now - last_access > self.ttl
]
expired_keys = [key for key, last_access in self.last_accessed.items() if now - last_access > self.ttl]
for key in expired_keys:
if key in self.cache:
@@ -217,9 +199,7 @@ class SearchService:
if SEARCH_CACHE_ENABLED:
cache_location = "Redis" if SEARCH_USE_REDIS else "Memory"
logger.info(
f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
)
logger.info(f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s")
async def info(self):
"""Return information about search service"""
@@ -270,9 +250,7 @@ class SearchService:
logger.info(
f"Document verification complete: {bodies_missing_count} bodies missing, {titles_missing_count} titles missing"
)
logger.info(
f"Total unique missing documents: {total_missing_count} out of {len(doc_ids)} total"
)
logger.info(f"Total unique missing documents: {total_missing_count} out of {len(doc_ids)} total")
# Return in a backwards-compatible format plus the detailed breakdown
return {
@@ -308,9 +286,7 @@ class SearchService:
# 1. Index title if available
if hasattr(shout, "title") and shout.title and isinstance(shout.title, str):
title_doc = {"id": str(shout.id), "title": shout.title.strip()}
indexing_tasks.append(
self.index_client.post("/index-title", json=title_doc)
)
indexing_tasks.append(self.index_client.post("/index-title", json=title_doc))
# 2. Index body content (subtitle, lead, body)
body_text_parts = []
@@ -346,9 +322,7 @@ class SearchService:
body_text = body_text[:MAX_TEXT_LENGTH]
body_doc = {"id": str(shout.id), "body": body_text}
indexing_tasks.append(
self.index_client.post("/index-body", json=body_doc)
)
indexing_tasks.append(self.index_client.post("/index-body", json=body_doc))
# 3. Index authors
authors = getattr(shout, "authors", [])
@@ -373,30 +347,22 @@ class SearchService:
if name:
author_doc = {"id": author_id, "name": name, "bio": combined_bio}
indexing_tasks.append(
self.index_client.post("/index-author", json=author_doc)
)
indexing_tasks.append(self.index_client.post("/index-author", json=author_doc))
# Run all indexing tasks in parallel
if indexing_tasks:
responses = await asyncio.gather(
*indexing_tasks, return_exceptions=True
)
responses = await asyncio.gather(*indexing_tasks, return_exceptions=True)
# Check for errors in responses
for i, response in enumerate(responses):
if isinstance(response, Exception):
logger.error(f"Error in indexing task {i}: {response}")
elif (
hasattr(response, "status_code") and response.status_code >= 400
):
elif hasattr(response, "status_code") and response.status_code >= 400:
logger.error(
f"Error response in indexing task {i}: {response.status_code}, {await response.text()}"
)
logger.info(
f"Document {shout.id} indexed across {len(indexing_tasks)} endpoints"
)
logger.info(f"Document {shout.id} indexed across {len(indexing_tasks)} endpoints")
else:
logger.warning(f"No content to index for shout {shout.id}")
@@ -424,24 +390,14 @@ class SearchService:
for shout in shouts:
try:
# 1. Process title documents
if (
hasattr(shout, "title")
and shout.title
and isinstance(shout.title, str)
):
title_docs.append(
{"id": str(shout.id), "title": shout.title.strip()}
)
if hasattr(shout, "title") and shout.title and isinstance(shout.title, str):
title_docs.append({"id": str(shout.id), "title": shout.title.strip()})
# 2. Process body documents (subtitle, lead, body)
body_text_parts = []
for field_name in ["subtitle", "lead", "body"]:
field_value = getattr(shout, field_name, None)
if (
field_value
and isinstance(field_value, str)
and field_value.strip()
):
if field_value and isinstance(field_value, str) and field_value.strip():
body_text_parts.append(field_value.strip())
# Process media content if available
@@ -507,9 +463,7 @@ class SearchService:
}
except Exception as e:
logger.error(
f"Error processing shout {getattr(shout, 'id', 'unknown')} for indexing: {e}"
)
logger.error(f"Error processing shout {getattr(shout, 'id', 'unknown')} for indexing: {e}")
total_skipped += 1
# Convert author dict to list
@@ -543,9 +497,7 @@ class SearchService:
logger.info(f"Indexing {len(documents)} {doc_type} documents")
# Categorize documents by size
small_docs, medium_docs, large_docs = self._categorize_by_size(
documents, doc_type
)
small_docs, medium_docs, large_docs = self._categorize_by_size(documents, doc_type)
# Process each category with appropriate batch sizes
batch_sizes = {
@@ -561,9 +513,7 @@ class SearchService:
]:
if docs:
batch_size = batch_sizes[category]
await self._process_batches(
docs, batch_size, endpoint, f"{doc_type}-{category}"
)
await self._process_batches(docs, batch_size, endpoint, f"{doc_type}-{category}")
def _categorize_by_size(self, documents, doc_type):
"""Categorize documents by size for optimized batch processing"""
@@ -599,7 +549,7 @@ class SearchService:
"""Process document batches with retry logic"""
for i in range(0, len(documents), batch_size):
batch = documents[i : i + batch_size]
batch_id = f"{batch_prefix}-{i//batch_size + 1}"
batch_id = f"{batch_prefix}-{i // batch_size + 1}"
retry_count = 0
max_retries = 3
@@ -607,9 +557,7 @@ class SearchService:
while not success and retry_count < max_retries:
try:
response = await self.index_client.post(
endpoint, json=batch, timeout=90.0
)
response = await self.index_client.post(endpoint, json=batch, timeout=90.0)
if response.status_code == 422:
error_detail = response.json()
@@ -630,13 +578,13 @@ class SearchService:
batch[:mid],
batch_size // 2,
endpoint,
f"{batch_prefix}-{i//batch_size}-A",
f"{batch_prefix}-{i // batch_size}-A",
)
await self._process_batches(
batch[mid:],
batch_size // 2,
endpoint,
f"{batch_prefix}-{i//batch_size}-B",
f"{batch_prefix}-{i // batch_size}-B",
)
else:
logger.error(
@@ -649,9 +597,7 @@ class SearchService:
def _truncate_error_detail(self, error_detail):
"""Truncate error details for logging"""
truncated_detail = (
error_detail.copy() if isinstance(error_detail, dict) else error_detail
)
truncated_detail = error_detail.copy() if isinstance(error_detail, dict) else error_detail
if (
isinstance(truncated_detail, dict)
@@ -660,30 +606,22 @@ class SearchService:
):
for i, item in enumerate(truncated_detail["detail"]):
if isinstance(item, dict) and "input" in item:
if isinstance(item["input"], dict) and any(
k in item["input"] for k in ["documents", "text"]
):
if "documents" in item["input"] and isinstance(
item["input"]["documents"], list
):
if isinstance(item["input"], dict) and any(k in item["input"] for k in ["documents", "text"]):
if "documents" in item["input"] and isinstance(item["input"]["documents"], list):
for j, doc in enumerate(item["input"]["documents"]):
if (
"text" in doc
and isinstance(doc["text"], str)
and len(doc["text"]) > 100
):
item["input"]["documents"][j][
"text"
] = f"{doc['text'][:100]}... [truncated, total {len(doc['text'])} chars]"
if "text" in doc and isinstance(doc["text"], str) and len(doc["text"]) > 100:
item["input"]["documents"][j]["text"] = (
f"{doc['text'][:100]}... [truncated, total {len(doc['text'])} chars]"
)
if (
"text" in item["input"]
and isinstance(item["input"]["text"], str)
and len(item["input"]["text"]) > 100
):
item["input"][
"text"
] = f"{item['input']['text'][:100]}... [truncated, total {len(item['input']['text'])} chars]"
item["input"]["text"] = (
f"{item['input']['text'][:100]}... [truncated, total {len(item['input']['text'])} chars]"
)
return truncated_detail
@@ -711,9 +649,9 @@ class SearchService:
search_limit = SEARCH_PREFETCH_SIZE
else:
search_limit = limit
logger.info(f"Searching for: '{text}' (limit={limit}, offset={offset}, search_limit={search_limit})")
response = await self.client.post(
"/search-combined",
json={"text": text, "limit": search_limit},
@@ -767,9 +705,7 @@ class SearchService:
logger.info(
f"Searching authors for: '{text}' (limit={limit}, offset={offset}, search_limit={search_limit})"
)
response = await self.client.post(
"/search-author", json={"text": text, "limit": search_limit}
)
response = await self.client.post("/search-author", json={"text": text, "limit": search_limit})
response.raise_for_status()
result = response.json()
@@ -784,7 +720,7 @@ class SearchService:
# Store the full prefetch batch, then page it
await self.cache.store(cache_key, author_results)
return await self.cache.get(cache_key, limit, offset)
return author_results[offset : offset + limit]
except Exception as e:
@@ -802,9 +738,7 @@ class SearchService:
result = response.json()
if result.get("consistency", {}).get("status") != "ok":
null_count = result.get("consistency", {}).get(
"null_embeddings_count", 0
)
null_count = result.get("consistency", {}).get("null_embeddings_count", 0)
if null_count > 0:
logger.warning(f"Found {null_count} documents with NULL embeddings")
@@ -877,14 +811,10 @@ async def initialize_search_index(shouts_data):
index_status = await search_service.check_index_status()
if index_status.get("status") == "inconsistent":
problem_ids = index_status.get("consistency", {}).get(
"null_embeddings_sample", []
)
problem_ids = index_status.get("consistency", {}).get("null_embeddings_sample", [])
if problem_ids:
problem_docs = [
shout for shout in shouts_data if str(shout.id) in problem_ids
]
problem_docs = [shout for shout in shouts_data if str(shout.id) in problem_ids]
if problem_docs:
await search_service.bulk_index(problem_docs)
@@ -902,9 +832,7 @@ async def initialize_search_index(shouts_data):
if isinstance(media, str):
try:
media_json = json.loads(media)
if isinstance(media_json, dict) and (
media_json.get("title") or media_json.get("body")
):
if isinstance(media_json, dict) and (media_json.get("title") or media_json.get("body")):
return True
except Exception:
return True
@@ -922,13 +850,9 @@ async def initialize_search_index(shouts_data):
if verification.get("status") == "error":
return
# Only reindex missing docs that actually have body content
missing_ids = [
mid for mid in verification.get("missing", []) if mid in body_ids
]
missing_ids = [mid for mid in verification.get("missing", []) if mid in body_ids]
if missing_ids:
missing_docs = [
shout for shout in shouts_with_body if str(shout.id) in missing_ids
]
missing_docs = [shout for shout in shouts_with_body if str(shout.id) in missing_ids]
await search_service.bulk_index(missing_docs)
else:
pass
@@ -955,35 +879,35 @@ async def check_search_service():
print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
else:
print(f"[INFO] Search service is available: {info}")
# Initialize search index in the background
async def initialize_search_index_background():
"""
Запускает индексацию поиска в фоновом режиме с низким приоритетом.
Эта функция:
1. Загружает все shouts из базы данных
2. Индексирует их в поисковом сервисе
3. Выполняется асинхронно, не блокируя основной поток
4. Обрабатывает возможные ошибки, не прерывая работу приложения
Индексация запускается с задержкой после инициализации сервера,
чтобы не создавать дополнительную нагрузку при запуске.
"""
try:
print("[search] Starting background search indexing process")
from services.db import fetch_all_shouts
# Get total count first (optional)
all_shouts = await fetch_all_shouts()
total_count = len(all_shouts) if all_shouts else 0
print(f"[search] Fetched {total_count} shouts for background indexing")
if not all_shouts:
print("[search] No shouts found for indexing, skipping search index initialization")
return
# Start the indexing process with the fetched shouts
print("[search] Beginning background search index initialization...")
await initialize_search_index(all_shouts)

View File

@@ -80,12 +80,12 @@ class ViewedStorage:
# Получаем список всех ключей migrated_views_* и находим самый последний
keys = await redis.execute("KEYS", "migrated_views_*")
logger.info(f" * Raw Redis result for 'KEYS migrated_views_*': {len(keys)}")
# Декодируем байтовые строки, если есть
if keys and isinstance(keys[0], bytes):
keys = [k.decode('utf-8') for k in keys]
keys = [k.decode("utf-8") for k in keys]
logger.info(f" * Decoded keys: {keys}")
if not keys:
logger.warning(" * No migrated_views keys found in Redis")
return
@@ -93,7 +93,7 @@ class ViewedStorage:
# Фильтруем только ключи timestamp формата (исключаем migrated_views_slugs)
timestamp_keys = [k for k in keys if k != "migrated_views_slugs"]
logger.info(f" * Timestamp keys after filtering: {timestamp_keys}")
if not timestamp_keys:
logger.warning(" * No migrated_views timestamp keys found in Redis")
return
@@ -243,20 +243,12 @@ class ViewedStorage:
# Обновление тем и авторов с использованием вспомогательной функции
for [_st, topic] in (
session.query(ShoutTopic, Topic)
.join(Topic)
.join(Shout)
.where(Shout.slug == shout_slug)
.all()
session.query(ShoutTopic, Topic).join(Topic).join(Shout).where(Shout.slug == shout_slug).all()
):
update_groups(self.shouts_by_topic, topic.slug, shout_slug)
for [_st, author] in (
session.query(ShoutAuthor, Author)
.join(Author)
.join(Shout)
.where(Shout.slug == shout_slug)
.all()
session.query(ShoutAuthor, Author).join(Author).join(Shout).where(Shout.slug == shout_slug).all()
):
update_groups(self.shouts_by_author, author.slug, shout_slug)
@@ -289,9 +281,7 @@ class ViewedStorage:
if failed == 0:
when = datetime.now(timezone.utc) + timedelta(seconds=self.period)
t = format(when.astimezone().isoformat())
logger.info(
" ⎩ next update: %s" % (t.split("T")[0] + " " + t.split("T")[1].split(".")[0])
)
logger.info(" ⎩ next update: %s" % (t.split("T")[0] + " " + t.split("T")[1].split(".")[0]))
await asyncio.sleep(self.period)
else:
await asyncio.sleep(10)