spacy-words-separation2
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
import spacy
|
||||
from spacy.lang.ru.examples import
|
||||
|
||||
# Load the Russian language model
|
||||
nlp = spacy.load("ru_core_news_sm")
|
||||
nlp = spacy.load("ru_core_news_md")
|
||||
|
||||
def segment_text(text):
|
||||
"""
|
||||
@@ -11,7 +12,7 @@ def segment_text(text):
|
||||
doc = nlp(text)
|
||||
|
||||
# Extract words from the processed document
|
||||
segmented_text = ' '.join([token.text for token in doc])
|
||||
segmented_text = ' '.join([token.text for token in doc if not token.is_space])
|
||||
|
||||
return segmented_text
|
||||
|
||||
|
Reference in New Issue
Block a user