debug: logs in search.py and main.py
All checks were successful
Deploy on push / deploy (push) Successful in 46s

This commit is contained in:
Stepan Vladovskiy 2025-05-19 16:03:26 -03:00
parent 2bebfbd4df
commit f0d63e28f8
2 changed files with 91 additions and 24 deletions

32
main.py
View File

@ -63,6 +63,26 @@ async def lifespan(_app):
revalidation_manager.start(), revalidation_manager.start(),
) )
print("[lifespan] Basic initialization complete") print("[lifespan] Basic initialization complete")
# Verify the server is ready to accept connections
import socket
import os
def check_port_available(port):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.bind(('0.0.0.0', port))
s.close()
return False # Port is available, not in use
except:
return True # Port is in use, not available
# Check if the port is in use
port = int(os.environ.get("PORT", 8000))
if check_port_available(port):
print(f"[lifespan] ✅ Server port {port} is active, ready to accept connections")
else:
print(f"[lifespan] ⚠️ Warning: Server port {port} is not bound yet!")
# Add a delay before starting the intensive search indexing # Add a delay before starting the intensive search indexing
print("[lifespan] Waiting for system stabilization before search indexing...") print("[lifespan] Waiting for system stabilization before search indexing...")
@ -85,6 +105,7 @@ async def initialize_search_index_background():
print("[search] Starting background search indexing process") print("[search] Starting background search indexing process")
from services.db import fetch_all_shouts from services.db import fetch_all_shouts
print("[search] About to fetch all shouts for indexing")
# Get total count first (optional) # Get total count first (optional)
all_shouts = await fetch_all_shouts() all_shouts = await fetch_all_shouts()
total_count = len(all_shouts) if all_shouts else 0 total_count = len(all_shouts) if all_shouts else 0
@ -94,8 +115,19 @@ async def initialize_search_index_background():
print("[search] Beginning background search index initialization...") print("[search] Beginning background search index initialization...")
await initialize_search_index(all_shouts) await initialize_search_index(all_shouts)
print("[search] Background search index initialization complete") print("[search] Background search index initialization complete")
# Perform a test search to verify indexing worked
try:
print("[search] Running test search to verify index...")
from services.search import search_text
test_results = await search_text("test", 3)
print(f"[search] Test search complete with {len(test_results)} results")
except Exception as test_error:
print(f"[search] Test search error: {str(test_error)}")
except Exception as e: except Exception as e:
print(f"[search] Error in background search indexing: {str(e)}") print(f"[search] Error in background search indexing: {str(e)}")
import traceback
print(f"[search] Search indexing traceback: {traceback.format_exc()}")
# Создаем экземпляр GraphQL # Создаем экземпляр GraphQL
graphql_app = GraphQL(schema, debug=True) graphql_app = GraphQL(schema, debug=True)

View File

@ -864,30 +864,54 @@ async def get_author_search_count(text: str):
async def initialize_search_index(shouts_data): async def initialize_search_index(shouts_data):
"""Initialize search index with existing data during application startup""" """Initialize search index with existing data during application startup"""
if not SEARCH_ENABLED: if not SEARCH_ENABLED:
logger.info("Search indexing skipped - search is disabled")
return return
if not shouts_data: if not shouts_data:
logger.info("Search indexing skipped - no shouts data available")
return return
info = await search_service.info() # Add a timeout for the search information request
if info.get("status") in ["error", "unavailable", "disabled"]: try:
info_future = search_service.info()
info = await asyncio.wait_for(info_future, timeout=15.0) # 15 second timeout
if info.get("status") in ["error", "unavailable", "disabled"]:
logger.error(f"Search indexing aborted - search service unavailable: {info}")
return
logger.info(f"Search indexing proceeding with index stats: {info.get('index_stats', {})}")
except asyncio.TimeoutError:
logger.error("Search service info request timed out after 15 seconds")
return
except Exception as e:
logger.error(f"Error getting search service info: {str(e)}")
return return
index_stats = info.get("index_stats", {}) index_stats = info.get("index_stats", {})
indexed_doc_count = index_stats.get("total_count", 0) indexed_doc_count = index_stats.get("total_count", 0)
try:
index_status_future = search_service.check_index_status()
index_status = await asyncio.wait_for(index_status_future, timeout=15.0)
if index_status.get("status") == "inconsistent":
logger.warning("Found inconsistent search index state")
problem_ids = index_status.get("consistency", {}).get(
"null_embeddings_sample", []
)
index_status = await search_service.check_index_status() if problem_ids:
if index_status.get("status") == "inconsistent": problem_docs = [
problem_ids = index_status.get("consistency", {}).get( shout for shout in shouts_data if str(shout.id) in problem_ids
"null_embeddings_sample", [] ]
) if problem_docs:
logger.info(f"Reindexing {len(problem_docs)} inconsistent documents")
if problem_ids: await search_service.bulk_index(problem_docs)
problem_docs = [ except asyncio.TimeoutError:
shout for shout in shouts_data if str(shout.id) in problem_ids logger.error("Search index status check timed out after 15 seconds")
] except Exception as e:
if problem_docs: logger.error(f"Error checking search index status: {str(e)}")
await search_service.bulk_index(problem_docs)
# Only consider shouts with body content for body verification # Only consider shouts with body content for body verification
def has_body_content(shout): def has_body_content(shout):
@ -937,14 +961,25 @@ async def initialize_search_index(shouts_data):
try: try:
test_query = "test" test_query = "test"
# Use body search since that's most likely to return results # Use body search since that's most likely to return results
test_results = await search_text(test_query, 5) logger.info(f"Running test search with query: '{test_query}'")
try:
if test_results: search_future = search_text(test_query, 5)
categories = set() test_results = await asyncio.wait_for(search_future, timeout=15.0)
for result in test_results:
result_id = result.get("id") if test_results:
matching_shouts = [s for s in shouts_data if str(s.id) == result_id] logger.info(f"Test search successful! Found {len(test_results)} results")
if matching_shouts and hasattr(matching_shouts[0], "category"): categories = set()
categories.add(getattr(matching_shouts[0], "category", "unknown")) for result in test_results:
result_id = result.get("id")
matching_shouts = [s for s in shouts_data if str(s.id) == result_id]
if matching_shouts and hasattr(matching_shouts[0], "category"):
categories.add(getattr(matching_shouts[0], "category", "unknown"))
logger.info(f"Search test complete: found categories {categories}")
else:
logger.warning("Test search completed but returned no results")
except asyncio.TimeoutError:
logger.error("Test search timed out after 15 seconds")
except Exception as test_error:
logger.error(f"Error during test search: {str(test_error)}")
except Exception as e: except Exception as e:
pass logger.error(f"Error in final search verification: {str(e)}")