import torch from transformers import AutoTokenizer, \ AutoModelForSequenceClassification tiny_tox_model_path = 'cointegrated/rubert-tiny-toxicity' tiny_tox_tokenizer = AutoTokenizer.from_pretrained(tiny_tox_model_path) tiny_tox_model = AutoModelForSequenceClassification.from_pretrained( tiny_tox_model_path) # if torch.cuda.is_available(): # model.cuda() def text2toxicity(text, aggregate=True) -> float: """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)""" proba = 0.0 with torch.no_grad(): inputs = tiny_tox_tokenizer( text.lower(), return_tensors='pt', truncation=True, padding=True ).to(tiny_tox_model.device) proba = torch.sigmoid(tiny_tox_model(**inputs).logits).cpu().numpy() if isinstance(text, str): proba = proba[0] if aggregate: return 1 - proba.T[0] * (1 - proba.T[-1]) return float(proba)