dockerfile-fix

This commit is contained in:
Untone 2024-09-27 09:32:25 +03:00
parent a7b1925e8d
commit 14fc115e0f
2 changed files with 15 additions and 9 deletions

View File

@ -5,8 +5,10 @@ WORKDIR /app
COPY requirements.txt . COPY requirements.txt .
# Install system dependencies required for building Python packages
RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
RUN pip install asyncio aiohttp redis[hiredis]
# Install Python dependencies including redis with hiredis support
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
# Stage 2: Final stage # Stage 2: Final stage
@ -15,7 +17,10 @@ FROM python:slim
WORKDIR /app WORKDIR /app
# Copy only necessary files from the builder stage # Copy only necessary files from the builder stage
COPY --from=builder /usr/local/lib/python/dist-packages /usr/local/lib/python/dist-packages
COPY --from=builder /usr/local/lib/python3/dist-packages /usr/local/lib/python3/dist-packages
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
COPY . . COPY . .
EXPOSE 8080 EXPOSE 8080

View File

@ -8,11 +8,14 @@ model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
def is_russian_wording(text): def is_russian_wording(text):
""" """
Check if the text contains any Russian characters by checking Check if the text contains more than one Russian characters by checking
each character against the Unicode range for Cyrillic. each character against the Unicode range for Cyrillic.
""" """
counter = 0
for char in text: for char in text:
if "\u0400" <= char <= "\u04ff": # Unicode range for Cyrillic characters if "\u0400" <= char <= "\u04ff": # Unicode range for Cyrillic characters
counter += 1
if counter > 1:
return True return True
return False return False
@ -39,17 +42,15 @@ def normalize(text):
Normalize English text to resemble Russian characters. Normalize English text to resemble Russian characters.
""" """
# Segment the text first # Segment the text first
segmented_text = segment_text( t = segment_text(
text.replace(" ", " ").replace(" ", " ").replace(" ", " ") text.replace(" ", " ").replace(" ", " ").replace(" ", " ")
) )
# Normalize after segmentation if is_russian_wording(t):
segmented_text = segmented_text.lower()
if is_russian_wording(segmented_text):
# Normalize the text by replacing characters # Normalize the text by replacing characters
normalized_text = ( normalized_text = (
segmented_text.replace("e", "е") t.lower()
.replace("e", "е")
.replace("o", "о") .replace("o", "о")
.replace("x", "х") .replace("x", "х")
.replace("a", "а") .replace("a", "а")
@ -74,7 +75,7 @@ def normalize(text):
return normalized_text return normalized_text
return segmented_text return t
# Example usage # Example usage