spacy-words-separation2

This commit is contained in:
Untone 2024-09-28 12:06:24 +03:00
parent 56a2632980
commit 7030e58f4b
3 changed files with 8 additions and 6 deletions

View File

@ -6,11 +6,14 @@ WORKDIR /app
COPY requirements.txt .
# Install system dependencies required for building Python packages
RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
RUN apt-get update && apt-get install -y --no-install-recommends wget gcc libffi-dev libssl-dev
# Install Python dependencies including redis with hiredis support
RUN pip install --no-cache-dir -r requirements.txt
# Download and install the Russian language model
RUN python -m spacy download ru_core_news_md
COPY . .
EXPOSE 8080

View File

@ -1,7 +1,8 @@
import spacy
from spacy.lang.ru.examples import
# Load the Russian language model
nlp = spacy.load("ru_core_news_sm")
nlp = spacy.load("ru_core_news_md")
def segment_text(text):
"""
@ -11,7 +12,7 @@ def segment_text(text):
doc = nlp(text)
# Extract words from the processed document
segmented_text = ' '.join([token.text for token in doc])
segmented_text = ' '.join([token.text for token in doc if not token.is_space])
return segmented_text

View File

@ -4,5 +4,3 @@ aiofiles
spacy
transformers
easyocr
# protobuf
# sentencepiece