toxicity-detector
This commit is contained in:
31
nlp/toxicity.py
Normal file
31
nlp/toxicity.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import torch
|
||||
from transformers import AutoTokenizer, \
|
||||
AutoModelForSequenceClassification
|
||||
|
||||
tiny_tox_model_path = 'cointegrated/rubert-tiny-toxicity'
|
||||
tiny_tox_tokenizer = AutoTokenizer.from_pretrained(tiny_tox_model_path)
|
||||
tiny_tox_model = AutoModelForSequenceClassification.from_pretrained(
|
||||
tiny_tox_model_path)
|
||||
|
||||
|
||||
# if torch.cuda.is_available():
|
||||
# model.cuda()
|
||||
|
||||
|
||||
def text2toxicity(text, aggregate=True) -> float:
|
||||
""" Calculate toxicity of a text (if aggregate=True)
|
||||
or a vector of toxicity aspects (if aggregate=False)"""
|
||||
proba = 0.0
|
||||
with torch.no_grad():
|
||||
inputs = tiny_tox_tokenizer(
|
||||
text.lower(),
|
||||
return_tensors='pt',
|
||||
truncation=True,
|
||||
padding=True
|
||||
).to(tiny_tox_model.device)
|
||||
proba = torch.sigmoid(tiny_tox_model(**inputs).logits).cpu().numpy()
|
||||
if isinstance(text, str):
|
||||
proba = proba[0]
|
||||
if aggregate:
|
||||
return 1 - proba.T[0] * (1 - proba.T[-1])
|
||||
return float(proba)
|
Reference in New Issue
Block a user