debug: Limit max chars for bulk indexing
All checks were successful
Deploy on push / deploy (push) Successful in 53s

This commit is contained in:
Stepan Vladovskiy 2025-03-21 14:18:32 -03:00
parent 57e1e8e6bd
commit 19c5028a0c

View File

@ -95,10 +95,13 @@ class SearchService:
start_time = time.time()
logger.info(f"Starting bulk indexing of {len(shouts)} documents")
MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request
batch_size = MAX_BATCH_SIZE
total_indexed = 0
total_skipped = 0
total_truncated = 0
i = 0
for i in range(0, len(shouts), batch_size):
batch = shouts[i:i+batch_size]
logger.info(f"Processing batch {i//batch_size + 1} of {(len(shouts)-1)//batch_size + 1}, size {len(batch)}")
@ -137,6 +140,13 @@ class SearchService:
total_skipped += 1
continue
# Truncate text if it exceeds the maximum length
original_length = len(text)
if original_length > MAX_TEXT_LENGTH:
text = text[:MAX_TEXT_LENGTH]
logger.info(f"Truncated document {shout.id} from {original_length} to {MAX_TEXT_LENGTH} chars")
total_truncated += 1
documents.append({
"id": str(shout.id),
"text": text