spacy-words-separation2
This commit is contained in:
parent
56a2632980
commit
7030e58f4b
|
@ -6,11 +6,14 @@ WORKDIR /app
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
# Install system dependencies required for building Python packages
|
# Install system dependencies required for building Python packages
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends gcc libffi-dev libssl-dev
|
RUN apt-get update && apt-get install -y --no-install-recommends wget gcc libffi-dev libssl-dev
|
||||||
|
|
||||||
# Install Python dependencies including redis with hiredis support
|
# Install Python dependencies including redis with hiredis support
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Download and install the Russian language model
|
||||||
|
RUN python -m spacy download ru_core_news_md
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import spacy
|
import spacy
|
||||||
|
from spacy.lang.ru.examples import
|
||||||
|
|
||||||
# Load the Russian language model
|
# Load the Russian language model
|
||||||
nlp = spacy.load("ru_core_news_sm")
|
nlp = spacy.load("ru_core_news_md")
|
||||||
|
|
||||||
def segment_text(text):
|
def segment_text(text):
|
||||||
"""
|
"""
|
||||||
|
@ -11,7 +12,7 @@ def segment_text(text):
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
|
|
||||||
# Extract words from the processed document
|
# Extract words from the processed document
|
||||||
segmented_text = ' '.join([token.text for token in doc])
|
segmented_text = ' '.join([token.text for token in doc if not token.is_space])
|
||||||
|
|
||||||
return segmented_text
|
return segmented_text
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,4 @@ aiohttp
|
||||||
aiofiles
|
aiofiles
|
||||||
spacy
|
spacy
|
||||||
transformers
|
transformers
|
||||||
easyocr
|
easyocr
|
||||||
# protobuf
|
|
||||||
# sentencepiece
|
|
Loading…
Reference in New Issue
Block a user