debug: more logs, and analize precache.py

2025-05-19 16:10:35 -03:00 · 2025-05-19 16:10:35 -03:00 · 1690ed63aa
commit 1690ed63aa
parent f0d63e28f8
3 changed files with 176 additions and 52 deletions
--- a/cache/precache.py
+++ b/cache/precache.py
@ -111,21 +111,48 @@ async def precache_data():
            logger.info(f"{len(topics)} topics and their followings precached")
            # authors
-            authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
+            try:
-            logger.info(f"{len(authors)} authors found in database")
+                authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
-            for author in authors:
+                logger.info(f"{len(authors)} authors found in database")
-                if isinstance(author, Author):
+                
-                    profile = author.dict()
+                # Process authors in smaller batches to avoid long-running operations
-                    author_id = profile.get("id")
+                batch_size = 50
-                    user_id = profile.get("user", "").strip()
+                total_processed = 0
-                    if author_id and user_id:
+                
-                        await cache_author(profile)
+                # Create batches
-                        await asyncio.gather(
+                author_batches = [authors[i:i + batch_size] for i in range(0, len(authors), batch_size)]
-                            precache_authors_followers(author_id, session), precache_authors_follows(author_id, session)
+                logger.info(f"Processing authors in {len(author_batches)} batches of {batch_size}")
-                        )
+                
-                else:
+                for batch_idx, author_batch in enumerate(author_batches):
-                    logger.error(f"fail caching {author}")
+                    batch_tasks = []
-            logger.info(f"{len(authors)} authors and their followings precached")
+                    for author in author_batch:
                        if isinstance(author, Author):
                            profile = author.dict()
                            author_id = profile.get("id")
                            user_id = profile.get("user", "").strip()
                            if author_id and user_id:
                                # Add task to the batch
                                cache_task = cache_author(profile)
                                follower_task = precache_authors_followers(author_id, session)
                                follows_task = precache_authors_follows(author_id, session)
                                batch_tasks.extend([cache_task, follower_task, follows_task])
                        else:
                            logger.error(f"fail caching {author}")
                    # Run all tasks for this batch with timeout
                    if batch_tasks:
                        try:
                            await asyncio.wait_for(asyncio.gather(*batch_tasks), timeout=30)
                            total_processed += len(author_batch)
                            logger.info(f"Processed batch {batch_idx+1}/{len(author_batches)} ({total_processed}/{len(authors)} authors)")
                        except asyncio.TimeoutError:
                            logger.error(f"Timeout processing author batch {batch_idx+1}, continuing with next batch")
                logger.info(f"{total_processed} authors and their followings precached (out of {len(authors)} total)")
            except Exception as author_exc:
                import traceback
                logger.error(f"Error processing authors: {author_exc}")
                logger.error(traceback.format_exc())
    except Exception as exc:
        import traceback
--- a/main.py
+++ b/main.py
@ -37,11 +37,19 @@ async def start():
 async def check_search_service():
    """Check if search service is available and log result"""
-    info = await search_service.info()
+    try:
-    if info.get("status") in ["error", "unavailable"]:
+        info_task = search_service.info()
-        print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
+        info = await asyncio.wait_for(info_task, timeout=10.0)  # 10 second timeout
-    else:
+        
-        print(f"[INFO] Search service is available: {info}")
+        if info.get("status") in ["error", "unavailable"]:
            print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
        else:
            print(f"[INFO] Search service is available: {info}")
    except asyncio.TimeoutError:
        print("[WARNING] Search service check timed out after 10 seconds")
    except Exception as e:
        print(f"[WARNING] Error checking search service: {str(e)}")
        print("[INFO] Continuing startup with search service in degraded mode")
 # indexing DB data
@ -53,15 +61,57 @@ async def lifespan(_app):
    try:
        print("[lifespan] Starting application initialization")
        create_all_tables()
-        await asyncio.gather(
+        
-            redis.connect(),
+        # Run each initialization step separately to identify where it's hanging
-            precache_data(),
+        try:
-            ViewedStorage.init(),
+            print("[lifespan] Connecting to Redis...")
-            create_webhook_endpoint(),
+            await redis.connect()
-            check_search_service(),
+            print("[lifespan] Redis connected successfully")
-            start(),
+        except Exception as e:
-            revalidation_manager.start(),
+            print(f"[lifespan] Error connecting to Redis: {e}")
-        )
+            
        try:
            print("[lifespan] Starting precache operation...")
            await precache_data()
            print("[lifespan] Precache completed successfully")
        except Exception as e:
            print(f"[lifespan] Error during precache: {e}")
        try:
            print("[lifespan] Initializing ViewedStorage...")
            await ViewedStorage.init()
            print("[lifespan] ViewedStorage initialized successfully")
        except Exception as e:
            print(f"[lifespan] Error initializing ViewedStorage: {e}")
        try:
            print("[lifespan] Creating webhook endpoint...")
            await create_webhook_endpoint()
            print("[lifespan] Webhook endpoint created successfully")
        except Exception as e:
            print(f"[lifespan] Error creating webhook endpoint: {e}")
        try:
            print("[lifespan] Checking search service...")
            await check_search_service()
            print("[lifespan] Search service check completed")
        except Exception as e:
            print(f"[lifespan] Error checking search service: {e}")
        try:
            print("[lifespan] Starting app...")
            await start()
            print("[lifespan] App started successfully")
        except Exception as e:
            print(f"[lifespan] Error starting app: {e}")
        try:
            print("[lifespan] Starting revalidation manager...")
            await revalidation_manager.start()
            print("[lifespan] Revalidation manager started successfully")
        except Exception as e:
            print(f"[lifespan] Error starting revalidation manager: {e}")
        print("[lifespan] Basic initialization complete")
        # Verify the server is ready to accept connections
@ -89,9 +139,20 @@ async def lifespan(_app):
        await asyncio.sleep(10)  # 10-second delay to let the system stabilize
        # Start search indexing as a background task with lower priority
-        asyncio.create_task(initialize_search_index_background())
+        try:
            print("[lifespan] Creating search indexing background task...")
            asyncio.create_task(initialize_search_index_background())
            print("[lifespan] Search indexing task scheduled successfully")
        except Exception as e:
            print(f"[lifespan] Error scheduling search indexing task: {e}")
        print("[lifespan] Full server startup completed successfully")
        yield
    except Exception as e:
        import traceback
        print(f"[lifespan] Critical error in lifespan function: {e}")
        print(f"[lifespan] Traceback: {traceback.format_exc()}")
        yield  # Still yield to allow clean shutdown
    finally:
        print("[lifespan] Shutting down application services")
        tasks = [redis.disconnect(), ViewedStorage.stop(), revalidation_manager.stop()]
@ -105,29 +166,52 @@ async def initialize_search_index_background():
        print("[search] Starting background search indexing process")
        from services.db import fetch_all_shouts
        print("[search] About to fetch all shouts for indexing")
        # Get total count first (optional)
        all_shouts = await fetch_all_shouts()
        total_count = len(all_shouts) if all_shouts else 0
        print(f"[search] Fetched {total_count} shouts for background indexing")
        # Start the indexing process with the fetched shouts
        print("[search] Beginning background search index initialization...")
        await initialize_search_index(all_shouts)
        print("[search] Background search index initialization complete")
        # Perform a test search to verify indexing worked
        try:
-            print("[search] Running test search to verify index...")
+            print("[search] About to fetch all shouts for indexing")
-            from services.search import search_text
+            # Get total count first (optional)
-            test_results = await search_text("test", 3)
+            all_shouts = await fetch_all_shouts()
-            print(f"[search] Test search complete with {len(test_results)} results")
+            total_count = len(all_shouts) if all_shouts else 0
-        except Exception as test_error:
+            print(f"[search] Fetched {total_count} shouts for background indexing")
-            print(f"[search] Test search error: {str(test_error)}")
+            
            # Skip indexing if no shouts found
            if not all_shouts or total_count == 0:
                print("[search] No shouts to index, skipping indexing process")
                return
            # Start the indexing process with the fetched shouts
            print("[search] Beginning background search index initialization...")
            # Add a timeout to the indexing operation
            try:
                index_task = initialize_search_index(all_shouts)
                await asyncio.wait_for(index_task, timeout=300)  # 5-minute timeout
                print("[search] Background search index initialization complete")
            except asyncio.TimeoutError:
                print("[search] Background indexing timed out after 5 minutes")
                return
            # Perform a test search to verify indexing worked
            try:
                print("[search] Running test search to verify index...")
                from services.search import search_text
                search_task = search_text("test", 3)
                test_results = await asyncio.wait_for(search_task, timeout=30)  # 30-second timeout
                print(f"[search] Test search complete with {len(test_results)} results")
            except asyncio.TimeoutError:
                print("[search] Test search timed out after 30 seconds")
            except Exception as test_error:
                print(f"[search] Test search error: {str(test_error)}")
        except Exception as inner_error:
            print(f"[search] Error in search indexing process: {str(inner_error)}")
            import traceback
            print(f"[search] Inner traceback: {traceback.format_exc()}")
        print("[search] Search initialization process completed (with or without errors)")
    except Exception as e:
-        print(f"[search] Error in background search indexing: {str(e)}")
+        print(f"[search] Outer error in background search indexing: {str(e)}")
        import traceback
-        print(f"[search] Search indexing traceback: {traceback.format_exc()}")
+        print(f"[search] Outer traceback: {traceback.format_exc()}")
 # Создаем экземпляр GraphQL
 graphql_app = GraphQL(schema, debug=True)
@ -155,11 +239,24 @@ app = Starlette(
    routes=[
        Route("/", graphql_handler, methods=["GET", "POST"]),
        Route("/new-author", WebhookEndpoint),
        # Health check endpoint
        Route("/health", lambda request: JSONResponse({"status": "healthy"}), methods=["GET"]),
    ],
    lifespan=lifespan,
    debug=True,
 )
 # Register an error handler for uncaught exceptions
@app.exception_handler(Exception)
 async def handle_exception(request, exc):
    print(f"Global exception handler caught: {str(exc)}")
    import traceback
    traceback.print_exc()
    return JSONResponse(
        status_code=500,
        content={"detail": "Internal Server Error", "message": str(exc)}
    )
 app.add_middleware(ExceptionHandlerMiddleware)
 if "dev" in sys.argv:
    app.add_middleware(
--- a/services/search.py
+++ b/services/search.py
@ -208,8 +208,8 @@ class SearchService:
        logger.info(f"Initializing search service with URL: {TXTAI_SERVICE_URL}")
        self.available = SEARCH_ENABLED
        # Use different timeout settings for indexing and search requests
-        self.client = httpx.AsyncClient(timeout=30.0, base_url=TXTAI_SERVICE_URL)
+        self.client = httpx.AsyncClient(timeout=10.0, base_url=TXTAI_SERVICE_URL)  # Reduced timeout for regular ops
-        self.index_client = httpx.AsyncClient(timeout=120.0, base_url=TXTAI_SERVICE_URL)
+        self.index_client = httpx.AsyncClient(timeout=60.0, base_url=TXTAI_SERVICE_URL)  # Reduced timeout for indexing
        # Initialize search cache
        self.cache = SearchCache() if SEARCH_CACHE_ENABLED else None