debug: logs in search.py and main.py
All checks were successful
Deploy on push / deploy (push) Successful in 46s

This commit is contained in:
Stepan Vladovskiy
2025-05-19 16:03:26 -03:00
parent 2bebfbd4df
commit f0d63e28f8
2 changed files with 91 additions and 24 deletions

View File

@@ -864,30 +864,54 @@ async def get_author_search_count(text: str):
async def initialize_search_index(shouts_data):
"""Initialize search index with existing data during application startup"""
if not SEARCH_ENABLED:
logger.info("Search indexing skipped - search is disabled")
return
if not shouts_data:
logger.info("Search indexing skipped - no shouts data available")
return
info = await search_service.info()
if info.get("status") in ["error", "unavailable", "disabled"]:
# Add a timeout for the search information request
try:
info_future = search_service.info()
info = await asyncio.wait_for(info_future, timeout=15.0) # 15 second timeout
if info.get("status") in ["error", "unavailable", "disabled"]:
logger.error(f"Search indexing aborted - search service unavailable: {info}")
return
logger.info(f"Search indexing proceeding with index stats: {info.get('index_stats', {})}")
except asyncio.TimeoutError:
logger.error("Search service info request timed out after 15 seconds")
return
except Exception as e:
logger.error(f"Error getting search service info: {str(e)}")
return
index_stats = info.get("index_stats", {})
indexed_doc_count = index_stats.get("total_count", 0)
try:
index_status_future = search_service.check_index_status()
index_status = await asyncio.wait_for(index_status_future, timeout=15.0)
if index_status.get("status") == "inconsistent":
logger.warning("Found inconsistent search index state")
problem_ids = index_status.get("consistency", {}).get(
"null_embeddings_sample", []
)
index_status = await search_service.check_index_status()
if index_status.get("status") == "inconsistent":
problem_ids = index_status.get("consistency", {}).get(
"null_embeddings_sample", []
)
if problem_ids:
problem_docs = [
shout for shout in shouts_data if str(shout.id) in problem_ids
]
if problem_docs:
await search_service.bulk_index(problem_docs)
if problem_ids:
problem_docs = [
shout for shout in shouts_data if str(shout.id) in problem_ids
]
if problem_docs:
logger.info(f"Reindexing {len(problem_docs)} inconsistent documents")
await search_service.bulk_index(problem_docs)
except asyncio.TimeoutError:
logger.error("Search index status check timed out after 15 seconds")
except Exception as e:
logger.error(f"Error checking search index status: {str(e)}")
# Only consider shouts with body content for body verification
def has_body_content(shout):
@@ -937,14 +961,25 @@ async def initialize_search_index(shouts_data):
try:
test_query = "test"
# Use body search since that's most likely to return results
test_results = await search_text(test_query, 5)
if test_results:
categories = set()
for result in test_results:
result_id = result.get("id")
matching_shouts = [s for s in shouts_data if str(s.id) == result_id]
if matching_shouts and hasattr(matching_shouts[0], "category"):
categories.add(getattr(matching_shouts[0], "category", "unknown"))
logger.info(f"Running test search with query: '{test_query}'")
try:
search_future = search_text(test_query, 5)
test_results = await asyncio.wait_for(search_future, timeout=15.0)
if test_results:
logger.info(f"Test search successful! Found {len(test_results)} results")
categories = set()
for result in test_results:
result_id = result.get("id")
matching_shouts = [s for s in shouts_data if str(s.id) == result_id]
if matching_shouts and hasattr(matching_shouts[0], "category"):
categories.add(getattr(matching_shouts[0], "category", "unknown"))
logger.info(f"Search test complete: found categories {categories}")
else:
logger.warning("Test search completed but returned no results")
except asyncio.TimeoutError:
logger.error("Test search timed out after 15 seconds")
except Exception as test_error:
logger.error(f"Error during test search: {str(test_error)}")
except Exception as e:
pass
logger.error(f"Error in final search verification: {str(e)}")