dockerfile-fix

This commit is contained in:
Untone 2024-09-27 09:32:25 +03:00
parent a7b1925e8d
commit 14fc115e0f
2 changed files with 15 additions and 9 deletions

View File

@ -5,8 +5,10 @@ WORKDIR /app
COPY requirements.txt .
# Install system dependencies required for building Python packages
RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
RUN pip install asyncio aiohttp redis[hiredis]
# Install Python dependencies including redis with hiredis support
RUN pip install --no-cache-dir -r requirements.txt
# Stage 2: Final stage
@ -15,7 +17,10 @@ FROM python:slim
WORKDIR /app
# Copy only necessary files from the builder stage
COPY --from=builder /usr/local/lib/python/dist-packages /usr/local/lib/python/dist-packages
COPY --from=builder /usr/local/lib/python3/dist-packages /usr/local/lib/python3/dist-packages
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
COPY . .
EXPOSE 8080

View File

@ -8,11 +8,14 @@ model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
def is_russian_wording(text):
"""
Check if the text contains any Russian characters by checking
Check if the text contains more than one Russian characters by checking
each character against the Unicode range for Cyrillic.
"""
counter = 0
for char in text:
if "\u0400" <= char <= "\u04ff": # Unicode range for Cyrillic characters
counter += 1
if counter > 1:
return True
return False
@ -39,17 +42,15 @@ def normalize(text):
Normalize English text to resemble Russian characters.
"""
# Segment the text first
segmented_text = segment_text(
t = segment_text(
text.replace(" ", " ").replace(" ", " ").replace(" ", " ")
)
# Normalize after segmentation
segmented_text = segmented_text.lower()
if is_russian_wording(segmented_text):
if is_russian_wording(t):
# Normalize the text by replacing characters
normalized_text = (
segmented_text.replace("e", "е")
t.lower()
.replace("e", "е")
.replace("o", "о")
.replace("x", "х")
.replace("a", "а")
@ -74,7 +75,7 @@ def normalize(text):
return normalized_text
return segmented_text
return t
# Example usage