refact(search,reader): withput any kind of sorting

2025-04-24 21:00:41 -03:00 · 2025-04-24 21:00:41 -03:00 · fac43e5997
commit fac43e5997
parent e7facf8d87
2 changed files with 18 additions and 58 deletions
--- a/resolvers/reader.py
+++ b/resolvers/reader.py
@ -396,38 +396,25 @@ async def load_shouts_search(_, info, text, options):
        # Get search results with pagination
        results = await search_text(text, limit, offset)
        
-        # If no results, return empty list
        if not results:
            logger.info(f"No search results found for '{text}'")
            return []
        
-        # Extract IDs and scores
-        scores = {}
-        hits_ids = []
-        for sr in results:
-            shout_id = sr.get("id")
-            if shout_id:
-                shout_id = str(shout_id)
-                scores[shout_id] = sr.get("score")
-                hits_ids.append(shout_id)
+        # Extract IDs in the order from the search engine
+        hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")]

        # Query DB for only the IDs in the current page
        q = query_with_stat(info)
        q = q.filter(Shout.id.in_(hits_ids))
        q = apply_filters(q, options.get("filters", {}))

-        #
        shouts = get_shouts_with_links(info, q, len(hits_ids), 0)

-        # Add scores from search results
-        for shout in shouts:
-            shout_id = str(shout['id'])
-            shout["score"] = scores.get(shout_id, 0)
+        # Reorder shouts to match the order from hits_ids
+        shouts_dict = {str(shout['id']): shout for shout in shouts}
+        ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict]

-        # Re-sort by search score to maintain ranking
-        shouts.sort(key=lambda x: scores.get(str(x['id']), 0), reverse=True)
-        
-        return shouts
+        return ordered_shouts
    return []


--- a/services/search.py
+++ b/services/search.py
@ -26,9 +26,8 @@ SEARCH_CACHE_ENABLED = bool(
    os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
 )
 SEARCH_CACHE_TTL_SECONDS = int(
-    os.environ.get("SEARCH_CACHE_TTL_SECONDS", "900")
+    os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")
 )  # Default: 15 minutes
-SEARCH_MIN_SCORE = float(os.environ.get("SEARCH_MIN_SCORE", "0.1"))
 SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
 SEARCH_USE_REDIS = bool(
    os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
@ -221,9 +220,6 @@ class SearchService:
            logger.info(
                f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
            )
-            logger.info(
-                f"Minimum score filter: {SEARCH_MIN_SCORE}, prefetch size: {SEARCH_PREFETCH_SIZE}"
-            )

    async def info(self):
        """Return information about search service"""
@ -712,47 +708,32 @@ class SearchService:
        # Not in cache or cache disabled, perform new search
        try:
            search_limit = limit
-            search_offset = offset

            if SEARCH_CACHE_ENABLED:
                search_limit = SEARCH_PREFETCH_SIZE
-                search_offset = 0
            else:
                search_limit = limit
-                search_offset = offset

            response = await self.client.post(
                "/search-combined",
-                json={"text": text, "limit": search_limit, "offset": search_offset},
+                json={"text": text, "limit": search_limit},
            )
            response.raise_for_status()
-
            result = response.json()
-
            formatted_results = result.get("results", [])

-            valid_results = []
-            for item in formatted_results:
-                doc_id = item.get("id")
-                if doc_id and doc_id.isdigit():
-                    valid_results.append(item)
+            # filter out non‑numeric IDs
+            valid_results = [r for r in formatted_results if r.get("id", "").isdigit()]
+            if len(valid_results) != len(formatted_results):
+                formatted_results = valid_results

            if len(valid_results) != len(formatted_results):
                formatted_results = valid_results

-            if SEARCH_MIN_SCORE > 0:
-                initial_count = len(formatted_results)
-                formatted_results = [
-                    r
-                    for r in formatted_results
-                    if r.get("score", 0) >= SEARCH_MIN_SCORE
-                ]
-
            if SEARCH_CACHE_ENABLED:
+                # Store the full prefetch batch, then page it
                await self.cache.store(text, formatted_results)
-                end_idx = offset + limit
-                page_results = formatted_results[offset:end_idx]
-                return page_results
+                return await self.cache.get(text, limit, offset)

            return formatted_results
        except Exception as e:
@ -783,12 +764,6 @@ class SearchService:
            result = response.json()
            author_results = result.get("results", [])

-            # Apply score filtering if needed
-            if SEARCH_MIN_SCORE > 0:
-                author_results = [
-                    r for r in author_results if r.get("score", 0) >= SEARCH_MIN_SCORE
-                ]
-
            # Store in cache if enabled
            if SEARCH_CACHE_ENABLED:
                await self.cache.store(cache_key, author_results)
@ -829,7 +804,7 @@ search_service = SearchService()
 # API-compatible function to perform a search


-async def search_text(text: str, limit: int = 50, offset: int = 0):
+async def search_text(text: str, limit: int = 200, offset: int = 0):
    payload = []
    if search_service.available:
        payload = await search_service.search(text, limit, offset)
@ -848,10 +823,8 @@ async def get_search_count(text: str):
    if not search_service.available:
        return 0

-    if SEARCH_CACHE_ENABLED:
-        cache_key = f"title:{text}"
-        if await search_service.cache.has_query(cache_key):
-            return await search_service.cache.get_total_count(cache_key)
+    if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text):
+        return await search_service.cache.get_total_count(text)

    # If not found in cache, fetch from endpoint
    return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))