refact(search,reader): withput any kind of sorting
All checks were successful
Deploy on push / deploy (push) Successful in 42s

This commit is contained in:
Stepan Vladovskiy 2025-04-24 21:00:41 -03:00
parent e7facf8d87
commit fac43e5997
2 changed files with 18 additions and 58 deletions

View File

@ -396,38 +396,25 @@ async def load_shouts_search(_, info, text, options):
# Get search results with pagination
results = await search_text(text, limit, offset)
# If no results, return empty list
if not results:
logger.info(f"No search results found for '{text}'")
return []
# Extract IDs and scores
scores = {}
hits_ids = []
for sr in results:
shout_id = sr.get("id")
if shout_id:
shout_id = str(shout_id)
scores[shout_id] = sr.get("score")
hits_ids.append(shout_id)
# Extract IDs in the order from the search engine
hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")]
# Query DB for only the IDs in the current page
q = query_with_stat(info)
q = q.filter(Shout.id.in_(hits_ids))
q = apply_filters(q, options.get("filters", {}))
#
shouts = get_shouts_with_links(info, q, len(hits_ids), 0)
# Add scores from search results
for shout in shouts:
shout_id = str(shout['id'])
shout["score"] = scores.get(shout_id, 0)
# Reorder shouts to match the order from hits_ids
shouts_dict = {str(shout['id']): shout for shout in shouts}
ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict]
# Re-sort by search score to maintain ranking
shouts.sort(key=lambda x: scores.get(str(x['id']), 0), reverse=True)
return shouts
return ordered_shouts
return []

View File

@ -26,9 +26,8 @@ SEARCH_CACHE_ENABLED = bool(
os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
)
SEARCH_CACHE_TTL_SECONDS = int(
os.environ.get("SEARCH_CACHE_TTL_SECONDS", "900")
os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")
) # Default: 15 minutes
SEARCH_MIN_SCORE = float(os.environ.get("SEARCH_MIN_SCORE", "0.1"))
SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
SEARCH_USE_REDIS = bool(
os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
@ -221,9 +220,6 @@ class SearchService:
logger.info(
f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
)
logger.info(
f"Minimum score filter: {SEARCH_MIN_SCORE}, prefetch size: {SEARCH_PREFETCH_SIZE}"
)
async def info(self):
"""Return information about search service"""
@ -712,47 +708,32 @@ class SearchService:
# Not in cache or cache disabled, perform new search
try:
search_limit = limit
search_offset = offset
if SEARCH_CACHE_ENABLED:
search_limit = SEARCH_PREFETCH_SIZE
search_offset = 0
else:
search_limit = limit
search_offset = offset
response = await self.client.post(
"/search-combined",
json={"text": text, "limit": search_limit, "offset": search_offset},
json={"text": text, "limit": search_limit},
)
response.raise_for_status()
result = response.json()
formatted_results = result.get("results", [])
valid_results = []
for item in formatted_results:
doc_id = item.get("id")
if doc_id and doc_id.isdigit():
valid_results.append(item)
# filter out nonnumeric IDs
valid_results = [r for r in formatted_results if r.get("id", "").isdigit()]
if len(valid_results) != len(formatted_results):
formatted_results = valid_results
if len(valid_results) != len(formatted_results):
formatted_results = valid_results
if SEARCH_MIN_SCORE > 0:
initial_count = len(formatted_results)
formatted_results = [
r
for r in formatted_results
if r.get("score", 0) >= SEARCH_MIN_SCORE
]
if SEARCH_CACHE_ENABLED:
# Store the full prefetch batch, then page it
await self.cache.store(text, formatted_results)
end_idx = offset + limit
page_results = formatted_results[offset:end_idx]
return page_results
return await self.cache.get(text, limit, offset)
return formatted_results
except Exception as e:
@ -783,12 +764,6 @@ class SearchService:
result = response.json()
author_results = result.get("results", [])
# Apply score filtering if needed
if SEARCH_MIN_SCORE > 0:
author_results = [
r for r in author_results if r.get("score", 0) >= SEARCH_MIN_SCORE
]
# Store in cache if enabled
if SEARCH_CACHE_ENABLED:
await self.cache.store(cache_key, author_results)
@ -829,7 +804,7 @@ search_service = SearchService()
# API-compatible function to perform a search
async def search_text(text: str, limit: int = 50, offset: int = 0):
async def search_text(text: str, limit: int = 200, offset: int = 0):
payload = []
if search_service.available:
payload = await search_service.search(text, limit, offset)
@ -848,10 +823,8 @@ async def get_search_count(text: str):
if not search_service.available:
return 0
if SEARCH_CACHE_ENABLED:
cache_key = f"title:{text}"
if await search_service.cache.has_query(cache_key):
return await search_service.cache.get_total_count(cache_key)
if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text):
return await search_service.cache.get_total_count(text)
# If not found in cache, fetch from endpoint
return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))