debug: Limit max chars for bulk indexing
All checks were successful
Deploy on push / deploy (push) Successful in 53s
All checks were successful
Deploy on push / deploy (push) Successful in 53s
This commit is contained in:
parent
57e1e8e6bd
commit
19c5028a0c
|
@ -94,11 +94,14 @@ class SearchService:
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
logger.info(f"Starting bulk indexing of {len(shouts)} documents")
|
logger.info(f"Starting bulk indexing of {len(shouts)} documents")
|
||||||
|
|
||||||
|
MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request
|
||||||
batch_size = MAX_BATCH_SIZE
|
batch_size = MAX_BATCH_SIZE
|
||||||
total_indexed = 0
|
total_indexed = 0
|
||||||
total_skipped = 0
|
total_skipped = 0
|
||||||
|
total_truncated = 0
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
for i in range(0, len(shouts), batch_size):
|
for i in range(0, len(shouts), batch_size):
|
||||||
batch = shouts[i:i+batch_size]
|
batch = shouts[i:i+batch_size]
|
||||||
logger.info(f"Processing batch {i//batch_size + 1} of {(len(shouts)-1)//batch_size + 1}, size {len(batch)}")
|
logger.info(f"Processing batch {i//batch_size + 1} of {(len(shouts)-1)//batch_size + 1}, size {len(batch)}")
|
||||||
|
@ -136,6 +139,13 @@ class SearchService:
|
||||||
logger.debug(f"Skipping shout {shout.id}: no text content")
|
logger.debug(f"Skipping shout {shout.id}: no text content")
|
||||||
total_skipped += 1
|
total_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Truncate text if it exceeds the maximum length
|
||||||
|
original_length = len(text)
|
||||||
|
if original_length > MAX_TEXT_LENGTH:
|
||||||
|
text = text[:MAX_TEXT_LENGTH]
|
||||||
|
logger.info(f"Truncated document {shout.id} from {original_length} to {MAX_TEXT_LENGTH} chars")
|
||||||
|
total_truncated += 1
|
||||||
|
|
||||||
documents.append({
|
documents.append({
|
||||||
"id": str(shout.id),
|
"id": str(shout.id),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user