stopwords-fix
This commit is contained in:
parent
3e4aefc429
commit
5f3bbe6229
|
@ -115,12 +115,8 @@ async def messages_routing(msg, state):
|
||||||
toxic_score = detector(normalized_text)
|
toxic_score = detector(normalized_text)
|
||||||
toxic_perc = math.floor(toxic_score * 100)
|
toxic_perc = math.floor(toxic_score * 100)
|
||||||
if toxic_score < 0.9 and text != text.replace(' ', ''):
|
if toxic_score < 0.9 and text != text.replace(' ', ''):
|
||||||
logger.info('check without spaces')
|
logger.info('re-check without spaces')
|
||||||
if check_stopwords(normalized_text):
|
toxic_perc += check_stopwords(normalized_text):
|
||||||
logger.info('stopword detected without spaces, toxicity +40%')
|
|
||||||
toxic_perc += 40
|
|
||||||
else:
|
|
||||||
logger.info('ok')
|
|
||||||
logger.info(f"text: {normalized_text}\ntoxic: {toxic_perc}%")
|
logger.info(f"text: {normalized_text}\ntoxic: {toxic_perc}%")
|
||||||
|
|
||||||
await redis.set(f"toxic:{cid}", mid)
|
await redis.set(f"toxic:{cid}", mid)
|
||||||
|
|
|
@ -68,21 +68,32 @@ def check_stopwords(text):
|
||||||
text (str): The input normalized text to check.
|
text (str): The input normalized text to check.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if any stopword is found in the text, False otherwise.
|
int: The score based on the number of stopwords found in the text.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> check_stopwords("this is a хуй")
|
>>> check_stopwords("this is a хуй")
|
||||||
True
|
40
|
||||||
|
|
||||||
>>> check_stopwords("this is clean")
|
>>> check_stopwords("this is clean")
|
||||||
False
|
0
|
||||||
|
|
||||||
|
>>> check_stopwords("хуй is a хуй")
|
||||||
|
80
|
||||||
|
|
||||||
|
>>> check_stopwords("clean is clean")
|
||||||
|
0
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Normalize the text by splitting into words
|
# Normalize the text by splitting into words
|
||||||
words = set(text.split())
|
words = set(text.split())
|
||||||
|
|
||||||
# Check for any intersection with stopword_set
|
# Check for any intersection with stopword_set
|
||||||
return not stopword_set.isdisjoint(words)
|
stopwords_found = stopword_set.intersection(words)
|
||||||
|
|
||||||
|
# Calculate the score based on the number of stopwords found
|
||||||
|
score = len(stopwords_found) * 46
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in New Issue
Block a user