ocr

2024-09-28 10:06:04 +03:00
parent 98b842ef18
commit 32ce2e17c4
4 changed files with 49 additions and 2 deletions
--- a/bot/api.py
+++ b/bot/api.py
@@ -1,4 +1,6 @@
 import aiohttp
 import aiofiles
 import os
 import json
 from urllib.parse import urlencode
 from bot.config import BOT_TOKEN
@@ -7,7 +9,6 @@ import logging
 # Create a logger instance
 logger = logging.getLogger("bot.api")
 api_base = f"https://api.telegram.org/bot{BOT_TOKEN}/"
@@ -31,3 +32,23 @@ async def telegram_api(endpoint: str, json_data=None, **kwargs):
        import traceback
        traceback.print_exc()
 async def download_file(file_id):
    """Asynchronously download a file from Telegram and yield the temporary file path."""
    download_url = f"{api_base}/{file_path}"
    # Get the file path of the file using the telegram_api method
    file = await telegram_api("getFile", file_id=file_id)
    file_path = file["result"]["file_path"]
    async with aiohttp.ClientSession() as session:
        async with session.get(download_url) as response:
            if response.status == 200:
                # Save the downloaded file to a temporary location
                async with aiofiles.tempfile.NamedTemporaryFile(delete=True) as temp_file:
                    await temp_file.write(await response.read())
                    await temp_file.flush()
                    yield temp_file.name  # Yield the path of the temporary file
            else:
                raise Exception(f"Failed to download file: {response.status}")
--- a/handlers/messages_routing.py
+++ b/handlers/messages_routing.py
@@ -7,6 +7,7 @@ from bot.config import FEEDBACK_CHAT_ID
 from handlers.handle_private import handle_private
 from nlp.toxicity_detector import detector
 from nlp.normalize import normalize
 from nlp.ocr import ocr_recognize
 logger = logging.getLogger("handlers.messages_routing")
@@ -14,7 +15,15 @@ logger = logging.getLogger("handlers.messages_routing")
 async def messages_routing(msg, state):
    cid = msg["chat"]["id"]
    uid = msg["from"]["id"]
-    text = msg.get("text", msg.get("caption"))
+    text = msg.get("caption", msg.get("text", ""))
    for photo in msg.get("photo", [])
        file_id = photo.get("file_id")
        if file_id:
            async for temp_file_path in download_file(file_id):
                text += ocr_recognize(temp_file_path)
                text += '\n'
    reply_msg = msg.get("reply_to_message")
    if cid == uid:
--- a/nlp/ocr.py
+++ b/nlp/ocr.py
@@ -0,0 +1,16 @@
 import easyocr
 import logging
 logger = logging.getLogger("[ocr] ")
 # Initialize the EasyOCR reader
 reader = easyocr.Reader(['ru'])  # Specify the languages you want to support
 def ocr_recognize(file_path):
    # Use EasyOCR to detect text in the photo
    result = reader.readtext(file_path)
    # Extract the recognized text from the result
    recognized_text = ' '.join([text for text, _, _ in result])
    logger.debug(f'recognized_text: {recognized_text}')
    return recognized_text
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,5 +2,6 @@ redis[hiredis]
 aiohttp
 torch
 transformers
 easyocr
 # protobuf
 # sentencepiece