18 lines
381 B
Python
18 lines
381 B
Python
import spacy
|
|
|
|
# Load the Russian language model
|
|
nlp = spacy.load("ru_core_news_md")
|
|
|
|
def segment_text(text):
|
|
"""
|
|
Use SpaCy to segment text into words.
|
|
"""
|
|
# Process the text with SpaCy
|
|
doc = nlp(text)
|
|
|
|
# Extract words from the processed document
|
|
segmented_text = ' '.join([token.text for token in doc if not token.is_space])
|
|
|
|
return segmented_text
|
|
|