diff --git a/resolvers/reader.py b/resolvers/reader.py index aeb60e50..23591963 100644 --- a/resolvers/reader.py +++ b/resolvers/reader.py @@ -396,38 +396,25 @@ async def load_shouts_search(_, info, text, options): # Get search results with pagination results = await search_text(text, limit, offset) - # If no results, return empty list if not results: logger.info(f"No search results found for '{text}'") return [] - # Extract IDs and scores - scores = {} - hits_ids = [] - for sr in results: - shout_id = sr.get("id") - if shout_id: - shout_id = str(shout_id) - scores[shout_id] = sr.get("score") - hits_ids.append(shout_id) + # Extract IDs in the order from the search engine + hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")] # Query DB for only the IDs in the current page q = query_with_stat(info) q = q.filter(Shout.id.in_(hits_ids)) q = apply_filters(q, options.get("filters", {})) - # shouts = get_shouts_with_links(info, q, len(hits_ids), 0) - - # Add scores from search results - for shout in shouts: - shout_id = str(shout['id']) - shout["score"] = scores.get(shout_id, 0) - # Re-sort by search score to maintain ranking - shouts.sort(key=lambda x: scores.get(str(x['id']), 0), reverse=True) - - return shouts + # Reorder shouts to match the order from hits_ids + shouts_dict = {str(shout['id']): shout for shout in shouts} + ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict] + + return ordered_shouts return [] diff --git a/services/search.py b/services/search.py index ba4c6f10..90cf20e2 100644 --- a/services/search.py +++ b/services/search.py @@ -26,9 +26,8 @@ SEARCH_CACHE_ENABLED = bool( os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"] ) SEARCH_CACHE_TTL_SECONDS = int( - os.environ.get("SEARCH_CACHE_TTL_SECONDS", "900") + os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300") ) # Default: 15 minutes -SEARCH_MIN_SCORE = float(os.environ.get("SEARCH_MIN_SCORE", "0.1")) SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200")) SEARCH_USE_REDIS = bool( os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"] @@ -221,9 +220,6 @@ class SearchService: logger.info( f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s" ) - logger.info( - f"Minimum score filter: {SEARCH_MIN_SCORE}, prefetch size: {SEARCH_PREFETCH_SIZE}" - ) async def info(self): """Return information about search service""" @@ -712,47 +708,32 @@ class SearchService: # Not in cache or cache disabled, perform new search try: search_limit = limit - search_offset = offset if SEARCH_CACHE_ENABLED: search_limit = SEARCH_PREFETCH_SIZE - search_offset = 0 else: search_limit = limit - search_offset = offset response = await self.client.post( "/search-combined", - json={"text": text, "limit": search_limit, "offset": search_offset}, + json={"text": text, "limit": search_limit}, ) response.raise_for_status() - result = response.json() - formatted_results = result.get("results", []) - valid_results = [] - for item in formatted_results: - doc_id = item.get("id") - if doc_id and doc_id.isdigit(): - valid_results.append(item) + # filter out non‑numeric IDs + valid_results = [r for r in formatted_results if r.get("id", "").isdigit()] + if len(valid_results) != len(formatted_results): + formatted_results = valid_results if len(valid_results) != len(formatted_results): formatted_results = valid_results - if SEARCH_MIN_SCORE > 0: - initial_count = len(formatted_results) - formatted_results = [ - r - for r in formatted_results - if r.get("score", 0) >= SEARCH_MIN_SCORE - ] - if SEARCH_CACHE_ENABLED: + # Store the full prefetch batch, then page it await self.cache.store(text, formatted_results) - end_idx = offset + limit - page_results = formatted_results[offset:end_idx] - return page_results + return await self.cache.get(text, limit, offset) return formatted_results except Exception as e: @@ -783,12 +764,6 @@ class SearchService: result = response.json() author_results = result.get("results", []) - # Apply score filtering if needed - if SEARCH_MIN_SCORE > 0: - author_results = [ - r for r in author_results if r.get("score", 0) >= SEARCH_MIN_SCORE - ] - # Store in cache if enabled if SEARCH_CACHE_ENABLED: await self.cache.store(cache_key, author_results) @@ -829,7 +804,7 @@ search_service = SearchService() # API-compatible function to perform a search -async def search_text(text: str, limit: int = 50, offset: int = 0): +async def search_text(text: str, limit: int = 200, offset: int = 0): payload = [] if search_service.available: payload = await search_service.search(text, limit, offset) @@ -848,10 +823,8 @@ async def get_search_count(text: str): if not search_service.available: return 0 - if SEARCH_CACHE_ENABLED: - cache_key = f"title:{text}" - if await search_service.cache.has_query(cache_key): - return await search_service.cache.get_total_count(cache_key) + if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text): + return await search_service.cache.get_total_count(text) # If not found in cache, fetch from endpoint return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))