dockerfile-fix
This commit is contained in:
parent
a7b1925e8d
commit
14fc115e0f
|
@ -5,8 +5,10 @@ WORKDIR /app
|
|||
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install system dependencies required for building Python packages
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
|
||||
RUN pip install asyncio aiohttp redis[hiredis]
|
||||
|
||||
# Install Python dependencies including redis with hiredis support
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Stage 2: Final stage
|
||||
|
@ -15,7 +17,10 @@ FROM python:slim
|
|||
WORKDIR /app
|
||||
|
||||
# Copy only necessary files from the builder stage
|
||||
COPY --from=builder /usr/local/lib/python/dist-packages /usr/local/lib/python/dist-packages
|
||||
COPY --from=builder /usr/local/lib/python3/dist-packages /usr/local/lib/python3/dist-packages
|
||||
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8080
|
||||
|
|
|
@ -8,11 +8,14 @@ model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
|
|||
|
||||
def is_russian_wording(text):
|
||||
"""
|
||||
Check if the text contains any Russian characters by checking
|
||||
Check if the text contains more than one Russian characters by checking
|
||||
each character against the Unicode range for Cyrillic.
|
||||
"""
|
||||
counter = 0
|
||||
for char in text:
|
||||
if "\u0400" <= char <= "\u04ff": # Unicode range for Cyrillic characters
|
||||
counter += 1
|
||||
if counter > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -39,17 +42,15 @@ def normalize(text):
|
|||
Normalize English text to resemble Russian characters.
|
||||
"""
|
||||
# Segment the text first
|
||||
segmented_text = segment_text(
|
||||
t = segment_text(
|
||||
text.replace(" ", " ").replace(" ", " ").replace(" ", " ")
|
||||
)
|
||||
|
||||
# Normalize after segmentation
|
||||
segmented_text = segmented_text.lower()
|
||||
|
||||
if is_russian_wording(segmented_text):
|
||||
if is_russian_wording(t):
|
||||
# Normalize the text by replacing characters
|
||||
normalized_text = (
|
||||
segmented_text.replace("e", "е")
|
||||
t.lower()
|
||||
.replace("e", "е")
|
||||
.replace("o", "о")
|
||||
.replace("x", "х")
|
||||
.replace("a", "а")
|
||||
|
@ -74,7 +75,7 @@ def normalize(text):
|
|||
|
||||
return normalized_text
|
||||
|
||||
return segmented_text
|
||||
return t
|
||||
|
||||
|
||||
# Example usage
|
||||
|
|
Loading…
Reference in New Issue
Block a user