debug(search.py): encrease batch size for bulk indexing
All checks were successful
Deploy on push / deploy (push) Successful in 1m1s

This commit is contained in:
Stepan Vladovskiy 2025-03-21 17:56:54 -03:00
parent fb820f67fd
commit 316375bf18

View File

@ -96,7 +96,7 @@ class SearchService:
start_time = time.time() start_time = time.time()
logger.info(f"Starting bulk indexing of {len(shouts)} documents") logger.info(f"Starting bulk indexing of {len(shouts)} documents")
MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request MAX_TEXT_LENGTH = 4000 # Maximum text length to send in a single request
max_batch_size = MAX_BATCH_SIZE max_batch_size = MAX_BATCH_SIZE
total_indexed = 0 total_indexed = 0
total_skipped = 0 total_skipped = 0
@ -176,17 +176,17 @@ class SearchService:
# Process small documents (larger batches) # Process small documents (larger batches)
if small_docs: if small_docs:
batch_size = min(max_batch_size, 25) batch_size = min(max_batch_size, 15)
await self._process_document_batches(small_docs, batch_size, "small") await self._process_document_batches(small_docs, batch_size, "small")
# Process medium documents (medium batches) # Process medium documents (medium batches)
if medium_docs: if medium_docs:
batch_size = min(max_batch_size, 15) batch_size = min(max_batch_size, 10)
await self._process_document_batches(medium_docs, batch_size, "medium") await self._process_document_batches(medium_docs, batch_size, "medium")
# Process large documents (small batches) # Process large documents (small batches)
if large_docs: if large_docs:
batch_size = min(max_batch_size, 5) batch_size = min(max_batch_size, 3)
await self._process_document_batches(large_docs, batch_size, "large") await self._process_document_batches(large_docs, batch_size, "large")
elapsed = time.time() - start_time elapsed = time.time() - start_time