debug: more logs, and analize precache.py
All checks were successful
Deploy on push / deploy (push) Successful in 44s

This commit is contained in:
Stepan Vladovskiy 2025-05-19 16:10:35 -03:00
parent f0d63e28f8
commit 1690ed63aa
3 changed files with 176 additions and 52 deletions

57
cache/precache.py vendored
View File

@ -111,21 +111,48 @@ async def precache_data():
logger.info(f"{len(topics)} topics and their followings precached") logger.info(f"{len(topics)} topics and their followings precached")
# authors # authors
authors = get_with_stat(select(Author).where(Author.user.is_not(None))) try:
logger.info(f"{len(authors)} authors found in database") authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
for author in authors: logger.info(f"{len(authors)} authors found in database")
if isinstance(author, Author):
profile = author.dict() # Process authors in smaller batches to avoid long-running operations
author_id = profile.get("id") batch_size = 50
user_id = profile.get("user", "").strip() total_processed = 0
if author_id and user_id:
await cache_author(profile) # Create batches
await asyncio.gather( author_batches = [authors[i:i + batch_size] for i in range(0, len(authors), batch_size)]
precache_authors_followers(author_id, session), precache_authors_follows(author_id, session) logger.info(f"Processing authors in {len(author_batches)} batches of {batch_size}")
)
else: for batch_idx, author_batch in enumerate(author_batches):
logger.error(f"fail caching {author}") batch_tasks = []
logger.info(f"{len(authors)} authors and their followings precached") for author in author_batch:
if isinstance(author, Author):
profile = author.dict()
author_id = profile.get("id")
user_id = profile.get("user", "").strip()
if author_id and user_id:
# Add task to the batch
cache_task = cache_author(profile)
follower_task = precache_authors_followers(author_id, session)
follows_task = precache_authors_follows(author_id, session)
batch_tasks.extend([cache_task, follower_task, follows_task])
else:
logger.error(f"fail caching {author}")
# Run all tasks for this batch with timeout
if batch_tasks:
try:
await asyncio.wait_for(asyncio.gather(*batch_tasks), timeout=30)
total_processed += len(author_batch)
logger.info(f"Processed batch {batch_idx+1}/{len(author_batches)} ({total_processed}/{len(authors)} authors)")
except asyncio.TimeoutError:
logger.error(f"Timeout processing author batch {batch_idx+1}, continuing with next batch")
logger.info(f"{total_processed} authors and their followings precached (out of {len(authors)} total)")
except Exception as author_exc:
import traceback
logger.error(f"Error processing authors: {author_exc}")
logger.error(traceback.format_exc())
except Exception as exc: except Exception as exc:
import traceback import traceback

167
main.py
View File

@ -37,11 +37,19 @@ async def start():
async def check_search_service(): async def check_search_service():
"""Check if search service is available and log result""" """Check if search service is available and log result"""
info = await search_service.info() try:
if info.get("status") in ["error", "unavailable"]: info_task = search_service.info()
print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}") info = await asyncio.wait_for(info_task, timeout=10.0) # 10 second timeout
else:
print(f"[INFO] Search service is available: {info}") if info.get("status") in ["error", "unavailable"]:
print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
else:
print(f"[INFO] Search service is available: {info}")
except asyncio.TimeoutError:
print("[WARNING] Search service check timed out after 10 seconds")
except Exception as e:
print(f"[WARNING] Error checking search service: {str(e)}")
print("[INFO] Continuing startup with search service in degraded mode")
# indexing DB data # indexing DB data
@ -53,15 +61,57 @@ async def lifespan(_app):
try: try:
print("[lifespan] Starting application initialization") print("[lifespan] Starting application initialization")
create_all_tables() create_all_tables()
await asyncio.gather(
redis.connect(), # Run each initialization step separately to identify where it's hanging
precache_data(), try:
ViewedStorage.init(), print("[lifespan] Connecting to Redis...")
create_webhook_endpoint(), await redis.connect()
check_search_service(), print("[lifespan] Redis connected successfully")
start(), except Exception as e:
revalidation_manager.start(), print(f"[lifespan] Error connecting to Redis: {e}")
)
try:
print("[lifespan] Starting precache operation...")
await precache_data()
print("[lifespan] Precache completed successfully")
except Exception as e:
print(f"[lifespan] Error during precache: {e}")
try:
print("[lifespan] Initializing ViewedStorage...")
await ViewedStorage.init()
print("[lifespan] ViewedStorage initialized successfully")
except Exception as e:
print(f"[lifespan] Error initializing ViewedStorage: {e}")
try:
print("[lifespan] Creating webhook endpoint...")
await create_webhook_endpoint()
print("[lifespan] Webhook endpoint created successfully")
except Exception as e:
print(f"[lifespan] Error creating webhook endpoint: {e}")
try:
print("[lifespan] Checking search service...")
await check_search_service()
print("[lifespan] Search service check completed")
except Exception as e:
print(f"[lifespan] Error checking search service: {e}")
try:
print("[lifespan] Starting app...")
await start()
print("[lifespan] App started successfully")
except Exception as e:
print(f"[lifespan] Error starting app: {e}")
try:
print("[lifespan] Starting revalidation manager...")
await revalidation_manager.start()
print("[lifespan] Revalidation manager started successfully")
except Exception as e:
print(f"[lifespan] Error starting revalidation manager: {e}")
print("[lifespan] Basic initialization complete") print("[lifespan] Basic initialization complete")
# Verify the server is ready to accept connections # Verify the server is ready to accept connections
@ -89,9 +139,20 @@ async def lifespan(_app):
await asyncio.sleep(10) # 10-second delay to let the system stabilize await asyncio.sleep(10) # 10-second delay to let the system stabilize
# Start search indexing as a background task with lower priority # Start search indexing as a background task with lower priority
asyncio.create_task(initialize_search_index_background()) try:
print("[lifespan] Creating search indexing background task...")
asyncio.create_task(initialize_search_index_background())
print("[lifespan] Search indexing task scheduled successfully")
except Exception as e:
print(f"[lifespan] Error scheduling search indexing task: {e}")
print("[lifespan] Full server startup completed successfully")
yield yield
except Exception as e:
import traceback
print(f"[lifespan] Critical error in lifespan function: {e}")
print(f"[lifespan] Traceback: {traceback.format_exc()}")
yield # Still yield to allow clean shutdown
finally: finally:
print("[lifespan] Shutting down application services") print("[lifespan] Shutting down application services")
tasks = [redis.disconnect(), ViewedStorage.stop(), revalidation_manager.stop()] tasks = [redis.disconnect(), ViewedStorage.stop(), revalidation_manager.stop()]
@ -105,29 +166,52 @@ async def initialize_search_index_background():
print("[search] Starting background search indexing process") print("[search] Starting background search indexing process")
from services.db import fetch_all_shouts from services.db import fetch_all_shouts
print("[search] About to fetch all shouts for indexing")
# Get total count first (optional)
all_shouts = await fetch_all_shouts()
total_count = len(all_shouts) if all_shouts else 0
print(f"[search] Fetched {total_count} shouts for background indexing")
# Start the indexing process with the fetched shouts
print("[search] Beginning background search index initialization...")
await initialize_search_index(all_shouts)
print("[search] Background search index initialization complete")
# Perform a test search to verify indexing worked
try: try:
print("[search] Running test search to verify index...") print("[search] About to fetch all shouts for indexing")
from services.search import search_text # Get total count first (optional)
test_results = await search_text("test", 3) all_shouts = await fetch_all_shouts()
print(f"[search] Test search complete with {len(test_results)} results") total_count = len(all_shouts) if all_shouts else 0
except Exception as test_error: print(f"[search] Fetched {total_count} shouts for background indexing")
print(f"[search] Test search error: {str(test_error)}")
# Skip indexing if no shouts found
if not all_shouts or total_count == 0:
print("[search] No shouts to index, skipping indexing process")
return
# Start the indexing process with the fetched shouts
print("[search] Beginning background search index initialization...")
# Add a timeout to the indexing operation
try:
index_task = initialize_search_index(all_shouts)
await asyncio.wait_for(index_task, timeout=300) # 5-minute timeout
print("[search] Background search index initialization complete")
except asyncio.TimeoutError:
print("[search] Background indexing timed out after 5 minutes")
return
# Perform a test search to verify indexing worked
try:
print("[search] Running test search to verify index...")
from services.search import search_text
search_task = search_text("test", 3)
test_results = await asyncio.wait_for(search_task, timeout=30) # 30-second timeout
print(f"[search] Test search complete with {len(test_results)} results")
except asyncio.TimeoutError:
print("[search] Test search timed out after 30 seconds")
except Exception as test_error:
print(f"[search] Test search error: {str(test_error)}")
except Exception as inner_error:
print(f"[search] Error in search indexing process: {str(inner_error)}")
import traceback
print(f"[search] Inner traceback: {traceback.format_exc()}")
print("[search] Search initialization process completed (with or without errors)")
except Exception as e: except Exception as e:
print(f"[search] Error in background search indexing: {str(e)}") print(f"[search] Outer error in background search indexing: {str(e)}")
import traceback import traceback
print(f"[search] Search indexing traceback: {traceback.format_exc()}") print(f"[search] Outer traceback: {traceback.format_exc()}")
# Создаем экземпляр GraphQL # Создаем экземпляр GraphQL
graphql_app = GraphQL(schema, debug=True) graphql_app = GraphQL(schema, debug=True)
@ -155,11 +239,24 @@ app = Starlette(
routes=[ routes=[
Route("/", graphql_handler, methods=["GET", "POST"]), Route("/", graphql_handler, methods=["GET", "POST"]),
Route("/new-author", WebhookEndpoint), Route("/new-author", WebhookEndpoint),
# Health check endpoint
Route("/health", lambda request: JSONResponse({"status": "healthy"}), methods=["GET"]),
], ],
lifespan=lifespan, lifespan=lifespan,
debug=True, debug=True,
) )
# Register an error handler for uncaught exceptions
@app.exception_handler(Exception)
async def handle_exception(request, exc):
print(f"Global exception handler caught: {str(exc)}")
import traceback
traceback.print_exc()
return JSONResponse(
status_code=500,
content={"detail": "Internal Server Error", "message": str(exc)}
)
app.add_middleware(ExceptionHandlerMiddleware) app.add_middleware(ExceptionHandlerMiddleware)
if "dev" in sys.argv: if "dev" in sys.argv:
app.add_middleware( app.add_middleware(

View File

@ -208,8 +208,8 @@ class SearchService:
logger.info(f"Initializing search service with URL: {TXTAI_SERVICE_URL}") logger.info(f"Initializing search service with URL: {TXTAI_SERVICE_URL}")
self.available = SEARCH_ENABLED self.available = SEARCH_ENABLED
# Use different timeout settings for indexing and search requests # Use different timeout settings for indexing and search requests
self.client = httpx.AsyncClient(timeout=30.0, base_url=TXTAI_SERVICE_URL) self.client = httpx.AsyncClient(timeout=10.0, base_url=TXTAI_SERVICE_URL) # Reduced timeout for regular ops
self.index_client = httpx.AsyncClient(timeout=120.0, base_url=TXTAI_SERVICE_URL) self.index_client = httpx.AsyncClient(timeout=60.0, base_url=TXTAI_SERVICE_URL) # Reduced timeout for indexing
# Initialize search cache # Initialize search cache
self.cache = SearchCache() if SEARCH_CACHE_ENABLED else None self.cache = SearchCache() if SEARCH_CACHE_ENABLED else None