dockerfile-fix

2024-09-27 09:32:25 +03:00 · 2024-09-27 09:32:25 +03:00 · 14fc115e0f
commit 14fc115e0f
parent a7b1925e8d
2 changed files with 15 additions and 9 deletions
--- a/7
+++ b/7
@ -5,8 +5,10 @@ WORKDIR /app

 COPY requirements.txt .

+# Install system dependencies required for building Python packages
 RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
-RUN pip install asyncio aiohttp redis[hiredis] 
+
+# Install Python dependencies including redis with hiredis support
 RUN pip install --no-cache-dir -r requirements.txt

 # Stage 2: Final stage
@ -15,7 +17,10 @@ FROM python:slim
 WORKDIR /app

 # Copy only necessary files from the builder stage
+COPY --from=builder /usr/local/lib/python/dist-packages /usr/local/lib/python/dist-packages
+COPY --from=builder /usr/local/lib/python3/dist-packages /usr/local/lib/python3/dist-packages
 COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
+
 COPY . .

 EXPOSE 8080
--- a/nlp/normalize.py
+++ b/nlp/normalize.py
@ -8,11 +8,14 @@ model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")

 def is_russian_wording(text):
    """
-    Check if the text contains any Russian characters by checking
+    Check if the text contains more than one Russian characters by checking
    each character against the Unicode range for Cyrillic.
    """
+    counter = 0
    for char in text:
        if "\u0400" <= char <= "\u04ff":  # Unicode range for Cyrillic characters
+            counter += 1
+        if counter > 1:
            return True
    return False

@ -39,17 +42,15 @@ def normalize(text):
    Normalize English text to resemble Russian characters.
    """
    # Segment the text first
-    segmented_text = segment_text(
+    t = segment_text(
        text.replace("  ", " ").replace("  ", " ").replace("  ", " ")
    )

-    # Normalize after segmentation
-    segmented_text = segmented_text.lower()
-
-    if is_russian_wording(segmented_text):
+    if is_russian_wording(t):
        # Normalize the text by replacing characters
        normalized_text = (
-            segmented_text.replace("e", "е")
+            t.lower()
+            .replace("e", "е")
            .replace("o", "о")
            .replace("x", "х")
            .replace("a", "а")
@ -74,7 +75,7 @@ def normalize(text):

        return normalized_text

-    return segmented_text
+    return t


 # Example usage