core/cache/precache.py

161 lines
7.7 KiB
Python
Raw Normal View History

2024-08-07 06:51:09 +00:00
import asyncio
2025-03-20 09:24:30 +00:00
import json
2024-08-12 08:00:01 +00:00
2024-05-30 11:25:35 +00:00
from sqlalchemy import and_, join, select
2024-08-12 08:00:01 +00:00
from cache.cache import cache_author, cache_topic
2024-05-30 11:25:35 +00:00
from orm.author import Author, AuthorFollower
2024-08-12 08:00:01 +00:00
from orm.shout import Shout, ShoutAuthor, ShoutReactionsFollower, ShoutTopic
2024-05-30 11:25:35 +00:00
from orm.topic import Topic, TopicFollower
2024-05-30 11:29:00 +00:00
from resolvers.stat import get_with_stat
2024-05-30 11:25:35 +00:00
from services.db import local_session
2024-08-12 08:00:01 +00:00
from services.redis import redis
2024-08-07 05:57:56 +00:00
from utils.encoders import CustomJSONEncoder
from utils.logger import root_logger as logger
2024-05-30 11:25:35 +00:00
2024-08-07 06:51:09 +00:00
# Предварительное кеширование подписчиков автора
2024-06-04 06:07:46 +00:00
async def precache_authors_followers(author_id, session):
authors_followers = set()
followers_query = select(AuthorFollower.follower).where(AuthorFollower.author == author_id)
result = session.execute(followers_query)
2024-08-07 06:51:09 +00:00
authors_followers.update(row[0] for row in result if row[0])
2024-06-04 06:07:46 +00:00
2025-03-20 09:24:30 +00:00
followers_payload = json.dumps(list(authors_followers), cls=CustomJSONEncoder)
2024-06-04 06:07:46 +00:00
await redis.execute("SET", f"author:followers:{author_id}", followers_payload)
2024-08-07 06:51:09 +00:00
# Предварительное кеширование подписок автора
2024-06-04 06:07:46 +00:00
async def precache_authors_follows(author_id, session):
follows_topics_query = select(TopicFollower.topic).where(TopicFollower.follower == author_id)
follows_authors_query = select(AuthorFollower.author).where(AuthorFollower.follower == author_id)
2024-08-07 06:51:09 +00:00
follows_shouts_query = select(ShoutReactionsFollower.shout).where(ShoutReactionsFollower.follower == author_id)
2024-06-04 06:07:46 +00:00
2024-08-07 06:51:09 +00:00
follows_topics = {row[0] for row in session.execute(follows_topics_query) if row[0]}
follows_authors = {row[0] for row in session.execute(follows_authors_query) if row[0]}
follows_shouts = {row[0] for row in session.execute(follows_shouts_query) if row[0]}
2024-06-04 06:07:46 +00:00
2025-03-20 09:24:30 +00:00
topics_payload = json.dumps(list(follows_topics), cls=CustomJSONEncoder)
authors_payload = json.dumps(list(follows_authors), cls=CustomJSONEncoder)
shouts_payload = json.dumps(list(follows_shouts), cls=CustomJSONEncoder)
2024-08-07 06:51:09 +00:00
await asyncio.gather(
redis.execute("SET", f"author:follows-topics:{author_id}", topics_payload),
redis.execute("SET", f"author:follows-authors:{author_id}", authors_payload),
redis.execute("SET", f"author:follows-shouts:{author_id}", shouts_payload),
)
2024-06-04 06:07:46 +00:00
2024-08-07 06:51:09 +00:00
# Предварительное кеширование авторов тем
2024-06-04 06:07:46 +00:00
async def precache_topics_authors(topic_id: int, session):
topic_authors_query = (
select(ShoutAuthor.author)
.select_from(join(ShoutTopic, Shout, ShoutTopic.shout == Shout.id))
.join(ShoutAuthor, ShoutAuthor.shout == Shout.id)
.filter(
and_(
ShoutTopic.topic == topic_id,
Shout.published_at.is_not(None),
Shout.deleted_at.is_(None),
)
)
)
2024-08-07 06:51:09 +00:00
topic_authors = {row[0] for row in session.execute(topic_authors_query) if row[0]}
2024-06-04 06:07:46 +00:00
2025-03-20 09:24:30 +00:00
authors_payload = json.dumps(list(topic_authors), cls=CustomJSONEncoder)
2024-06-04 06:07:46 +00:00
await redis.execute("SET", f"topic:authors:{topic_id}", authors_payload)
2024-08-07 06:51:09 +00:00
# Предварительное кеширование подписчиков тем
2024-06-04 06:07:46 +00:00
async def precache_topics_followers(topic_id: int, session):
followers_query = select(TopicFollower.follower).where(TopicFollower.topic == topic_id)
2024-08-07 06:51:09 +00:00
topic_followers = {row[0] for row in session.execute(followers_query) if row[0]}
2024-06-04 06:07:46 +00:00
2025-03-20 09:24:30 +00:00
followers_payload = json.dumps(list(topic_followers), cls=CustomJSONEncoder)
2024-06-04 06:07:46 +00:00
await redis.execute("SET", f"topic:followers:{topic_id}", followers_payload)
2024-05-30 11:25:35 +00:00
async def precache_data():
2024-10-14 10:08:43 +00:00
logger.info("precaching...")
2024-06-05 18:40:32 +00:00
try:
2024-10-23 08:22:07 +00:00
key = "authorizer_env"
2024-06-05 18:40:32 +00:00
# cache reset
2024-10-23 08:25:56 +00:00
value = await redis.execute("HGETALL", key)
2024-06-05 18:40:32 +00:00
await redis.execute("FLUSHDB")
2024-10-14 09:31:55 +00:00
logger.info("redis: FLUSHDB")
2024-06-05 18:40:32 +00:00
2024-10-23 08:25:56 +00:00
# Преобразуем словарь в список аргументов для HSET
if value:
2025-03-22 08:47:19 +00:00
# Если значение - словарь, преобразуем его в плоский список для HSET
if isinstance(value, dict):
flattened = []
for field, val in value.items():
flattened.extend([field, val])
await redis.execute("HSET", key, *flattened)
else:
# Предполагаем, что значение уже содержит список
await redis.execute("HSET", key, *value)
2024-10-23 08:25:56 +00:00
logger.info(f"redis hash '{key}' was restored")
2024-10-23 21:01:09 +00:00
2024-06-05 18:40:32 +00:00
with local_session() as session:
2024-08-07 06:51:09 +00:00
# topics
2024-08-07 08:52:07 +00:00
q = select(Topic).where(Topic.community == 1)
topics = get_with_stat(q)
2024-08-07 06:51:09 +00:00
for topic in topics:
2024-08-07 08:52:16 +00:00
topic_dict = topic.dict() if hasattr(topic, "dict") else topic
2024-08-07 08:38:34 +00:00
await cache_topic(topic_dict)
2024-08-07 06:51:09 +00:00
await asyncio.gather(
2024-08-07 09:18:29 +00:00
precache_topics_followers(topic_dict["id"], session),
precache_topics_authors(topic_dict["id"], session),
2024-08-07 06:51:09 +00:00
)
logger.info(f"{len(topics)} topics and their followings precached")
# authors
try:
authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
logger.info(f"{len(authors)} authors found in database")
# Process authors in smaller batches to avoid long-running operations
batch_size = 50
total_processed = 0
# Create batches
author_batches = [authors[i:i + batch_size] for i in range(0, len(authors), batch_size)]
logger.info(f"Processing authors in {len(author_batches)} batches of {batch_size}")
for batch_idx, author_batch in enumerate(author_batches):
batch_tasks = []
for author in author_batch:
if isinstance(author, Author):
profile = author.dict()
author_id = profile.get("id")
user_id = profile.get("user", "").strip()
if author_id and user_id:
# Add task to the batch
cache_task = cache_author(profile)
follower_task = precache_authors_followers(author_id, session)
follows_task = precache_authors_follows(author_id, session)
batch_tasks.extend([cache_task, follower_task, follows_task])
else:
logger.error(f"fail caching {author}")
# Run all tasks for this batch with timeout
if batch_tasks:
try:
await asyncio.wait_for(asyncio.gather(*batch_tasks), timeout=30)
total_processed += len(author_batch)
logger.info(f"Processed batch {batch_idx+1}/{len(author_batches)} ({total_processed}/{len(authors)} authors)")
except asyncio.TimeoutError:
logger.error(f"Timeout processing author batch {batch_idx+1}, continuing with next batch")
logger.info(f"{total_processed} authors and their followings precached (out of {len(authors)} total)")
except Exception as author_exc:
import traceback
logger.error(f"Error processing authors: {author_exc}")
logger.error(traceback.format_exc())
2024-06-05 18:40:32 +00:00
except Exception as exc:
2024-08-07 08:53:31 +00:00
import traceback
traceback.print_exc()
2024-08-07 06:51:09 +00:00
logger.error(f"Error in precache_data: {exc}")