welcomecenterbot/nlp/toxicity_detector.py

from transformers import BertTokenizer, BertForSequenceClassification
import torch
import torch.nn.functional as F

# Load tokenizer and model weights
tokenizer = BertTokenizer.from_pretrained(
    "SkolkovoInstitute/russian_toxicity_classifier"
)
model = BertForSequenceClassification.from_pretrained(
    "SkolkovoInstitute/russian_toxicity_classifier"
)


def detector(text):
    # Prepare the input
    batch = tokenizer.encode(text, return_tensors="pt")

    # Inference
    with torch.no_grad():
        result = model(batch)

    # Get logits
    logits = result.logits

    # Convert logits to probabilities using softmax
    probabilities = F.softmax(logits, dim=1)

    return probabilities[0][1].item()


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        p = detector(sys.argv[1])
        toxicity_percentage = p * 100  # Assuming index 1 is for toxic class
        print(f"Toxicity Probability: {toxicity_percentage:.2f}%")
. 2024-09-26 10:07:01 +00:00			`from transformers import BertTokenizer, BertForSequenceClassification`
fix-percentage 2024-09-26 10:24:18 +00:00			`import torch`
			`import torch.nn.functional as F`
. 2024-09-26 10:07:01 +00:00
fix-percentage 2024-09-26 10:24:18 +00:00			`# Load tokenizer and model weights`
ruffed 2024-09-27 06:23:55 +00:00			`tokenizer = BertTokenizer.from_pretrained(`
			`"SkolkovoInstitute/russian_toxicity_classifier"`
			`)`
			`model = BertForSequenceClassification.from_pretrained(`
			`"SkolkovoInstitute/russian_toxicity_classifier"`
			`)`

. 2024-09-26 10:07:01 +00:00
			`def detector(text):`
fix-percentage 2024-09-26 10:24:18 +00:00			`# Prepare the input`
ruffed 2024-09-27 06:23:55 +00:00			`batch = tokenizer.encode(text, return_tensors="pt")`
. 2024-09-26 10:07:01 +00:00
fix-percentage 2024-09-26 10:24:18 +00:00			`# Inference`
			`with torch.no_grad():`
			`result = model(batch)`
ruffed 2024-09-27 06:23:55 +00:00
fix-percentage 2024-09-26 10:24:18 +00:00			`# Get logits`
			`logits = result.logits`

			`# Convert logits to probabilities using softmax`
			`probabilities = F.softmax(logits, dim=1)`

			`return probabilities[0][1].item()`

ruffed 2024-09-27 06:23:55 +00:00
fix-percentage 2024-09-26 10:24:18 +00:00			`if __name__ == "__main__":`
			`import sys`
ruffed 2024-09-27 06:23:55 +00:00
fix-percentage 2024-09-26 10:24:18 +00:00			`if len(sys.argv) > 1:`
			`p = detector(sys.argv[1])`
			`toxicity_percentage = p * 100 # Assuming index 1 is for toxic class`
ruffed 2024-09-27 06:23:55 +00:00			`print(f"Toxicity Probability: {toxicity_percentage:.2f}%")`