2024-09-26 10:07:01 +00:00
|
|
|
from transformers import BertTokenizer, BertForSequenceClassification
|
2024-09-26 10:24:18 +00:00
|
|
|
import torch
|
|
|
|
import torch.nn.functional as F
|
2024-09-26 10:07:01 +00:00
|
|
|
|
2024-09-26 10:24:18 +00:00
|
|
|
# Load tokenizer and model weights
|
2024-09-27 06:23:55 +00:00
|
|
|
tokenizer = BertTokenizer.from_pretrained(
|
|
|
|
"SkolkovoInstitute/russian_toxicity_classifier"
|
|
|
|
)
|
|
|
|
model = BertForSequenceClassification.from_pretrained(
|
|
|
|
"SkolkovoInstitute/russian_toxicity_classifier"
|
|
|
|
)
|
|
|
|
|
2024-09-26 10:07:01 +00:00
|
|
|
|
|
|
|
def detector(text):
|
2024-09-26 10:24:18 +00:00
|
|
|
# Prepare the input
|
2024-09-27 06:23:55 +00:00
|
|
|
batch = tokenizer.encode(text, return_tensors="pt")
|
2024-09-26 10:07:01 +00:00
|
|
|
|
2024-09-26 10:24:18 +00:00
|
|
|
# Inference
|
|
|
|
with torch.no_grad():
|
|
|
|
result = model(batch)
|
2024-09-27 06:23:55 +00:00
|
|
|
|
2024-09-26 10:24:18 +00:00
|
|
|
# Get logits
|
|
|
|
logits = result.logits
|
|
|
|
|
|
|
|
# Convert logits to probabilities using softmax
|
|
|
|
probabilities = F.softmax(logits, dim=1)
|
|
|
|
|
|
|
|
return probabilities[0][1].item()
|
|
|
|
|
2024-09-27 06:23:55 +00:00
|
|
|
|
2024-09-26 10:24:18 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
import sys
|
2024-09-27 06:23:55 +00:00
|
|
|
|
2024-09-26 10:24:18 +00:00
|
|
|
if len(sys.argv) > 1:
|
|
|
|
p = detector(sys.argv[1])
|
|
|
|
toxicity_percentage = p * 100 # Assuming index 1 is for toxic class
|
2024-09-27 06:23:55 +00:00
|
|
|
print(f"Toxicity Probability: {toxicity_percentage:.2f}%")
|