stopwords-fix
This commit is contained in:
@@ -68,21 +68,32 @@ def check_stopwords(text):
|
||||
text (str): The input normalized text to check.
|
||||
|
||||
Returns:
|
||||
bool: True if any stopword is found in the text, False otherwise.
|
||||
int: The score based on the number of stopwords found in the text.
|
||||
|
||||
Examples:
|
||||
>>> check_stopwords("this is a хуй")
|
||||
True
|
||||
|
||||
40
|
||||
|
||||
>>> check_stopwords("this is clean")
|
||||
False
|
||||
0
|
||||
|
||||
>>> check_stopwords("хуй is a хуй")
|
||||
80
|
||||
|
||||
>>> check_stopwords("clean is clean")
|
||||
0
|
||||
"""
|
||||
|
||||
|
||||
# Normalize the text by splitting into words
|
||||
words = set(text.split())
|
||||
|
||||
|
||||
# Check for any intersection with stopword_set
|
||||
return not stopword_set.isdisjoint(words)
|
||||
stopwords_found = stopword_set.intersection(words)
|
||||
|
||||
# Calculate the score based on the number of stopwords found
|
||||
score = len(stopwords_found) * 46
|
||||
|
||||
return score
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user