debug: Limit max chars for bulk indexing
All checks were successful
Deploy on push / deploy (push) Successful in 53s
All checks were successful
Deploy on push / deploy (push) Successful in 53s
This commit is contained in:
parent
57e1e8e6bd
commit
19c5028a0c
|
@ -95,10 +95,13 @@ class SearchService:
|
|||
start_time = time.time()
|
||||
logger.info(f"Starting bulk indexing of {len(shouts)} documents")
|
||||
|
||||
MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request
|
||||
batch_size = MAX_BATCH_SIZE
|
||||
total_indexed = 0
|
||||
total_skipped = 0
|
||||
total_truncated = 0
|
||||
i = 0
|
||||
|
||||
for i in range(0, len(shouts), batch_size):
|
||||
batch = shouts[i:i+batch_size]
|
||||
logger.info(f"Processing batch {i//batch_size + 1} of {(len(shouts)-1)//batch_size + 1}, size {len(batch)}")
|
||||
|
@ -137,6 +140,13 @@ class SearchService:
|
|||
total_skipped += 1
|
||||
continue
|
||||
|
||||
# Truncate text if it exceeds the maximum length
|
||||
original_length = len(text)
|
||||
if original_length > MAX_TEXT_LENGTH:
|
||||
text = text[:MAX_TEXT_LENGTH]
|
||||
logger.info(f"Truncated document {shout.id} from {original_length} to {MAX_TEXT_LENGTH} chars")
|
||||
total_truncated += 1
|
||||
|
||||
documents.append({
|
||||
"id": str(shout.id),
|
||||
"text": text
|
||||
|
|
Loading…
Reference in New Issue
Block a user