welcomecenterbot/nlp/segment_text.py
2024-09-28 12:20:09 +03:00

18 lines
381 B
Python

import spacy
# Load the Russian language model
nlp = spacy.load("ru_core_news_md")
def segment_text(text):
"""
Use SpaCy to segment text into words.
"""
# Process the text with SpaCy
doc = nlp(text)
# Extract words from the processed document
segmented_text = ' '.join([token.text for token in doc if not token.is_space])
return segmented_text