From fb820f67fd73e56d9ec0f405ba9968abf9e6a6ce Mon Sep 17 00:00:00 2001 From: Stepan Vladovskiy Date: Fri, 21 Mar 2025 17:48:26 -0300 Subject: [PATCH] debug(search.py): encrease batch size for bulk indexing --- services/search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/search.py b/services/search.py index c7e9c80c..9a0dafbf 100644 --- a/services/search.py +++ b/services/search.py @@ -96,7 +96,7 @@ class SearchService: start_time = time.time() logger.info(f"Starting bulk indexing of {len(shouts)} documents") - MAX_TEXT_LENGTH = 4000 # Maximum text length to send in a single request + MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request max_batch_size = MAX_BATCH_SIZE total_indexed = 0 total_skipped = 0 @@ -176,17 +176,17 @@ class SearchService: # Process small documents (larger batches) if small_docs: - batch_size = min(max_batch_size, 15) + batch_size = min(max_batch_size, 25) await self._process_document_batches(small_docs, batch_size, "small") # Process medium documents (medium batches) if medium_docs: - batch_size = min(max_batch_size, 10) + batch_size = min(max_batch_size, 15) await self._process_document_batches(medium_docs, batch_size, "medium") # Process large documents (small batches) if large_docs: - batch_size = min(max_batch_size, 3) + batch_size = min(max_batch_size, 5) await self._process_document_batches(large_docs, batch_size, "large") elapsed = time.time() - start_time