diff --git a/nlp/ocr.py b/nlp/ocr.py index 03ec27e..d66b2e1 100644 --- a/nlp/ocr.py +++ b/nlp/ocr.py @@ -7,12 +7,15 @@ logger = logging.getLogger(" ocr ") reader = easyocr.Reader(['ru']) # Specify the languages you want to support def ocr_recognize(file_path): + sum_text = "" + # Use EasyOCR to detect text in the photo - result = reader.readtext(file_path) - logger.debug("OCR Result: %s", result) + results = reader.readtext(file_path) + for result in results: + [_coords, ocr_text, ocr_accuracy] = result + logger.debug("OCR Result: %s", ocr_text) + if ocr_accuracy.item() > 0.5: + sum_text += ocr_text - # Extract recognized text - recognized_text = ' '.join([text[1] for text in result if isinstance(text, tuple) and len(text) > 1]) - - logger.debug(f'Recognized Text: {recognized_text}') - return recognized_text \ No newline at end of file + logger.debug(f'Recognized Text: {sum_text}') + return sum_text \ No newline at end of file