fix-percentage

2024-09-26 13:24:18 +03:00 · 2024-09-26 13:24:18 +03:00 · 4a025a5595
commit 4a025a5595
parent 878da549e0
2 changed files with 23 additions and 6 deletions
--- a/nlp/toxycity_detector.py
+++ b/nlp/toxycity_detector.py
@ -1,13 +1,30 @@
 from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+import torch.nn.functional as F

-# load tokenizer and model weights
+# Load tokenizer and model weights
 tokenizer = BertTokenizer.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')
 model = BertForSequenceClassification.from_pretrained('SkolkovoInstitute/russian_toxicity_classifier')

-
 def detector(text):
-    # prepare the input
+    # Prepare the input
    batch = tokenizer.encode(text, return_tensors='pt')

-    # inference
-    model(batch)
+    # Inference
+    with torch.no_grad():
+        result = model(batch)
+    
+    # Get logits
+    logits = result.logits
+
+    # Convert logits to probabilities using softmax
+    probabilities = F.softmax(logits, dim=1)
+
+    return probabilities[0][1].item()
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        p = detector(sys.argv[1])
+        toxicity_percentage = p * 100  # Assuming index 1 is for toxic class
+        print(f"Toxicity Probability: {toxicity_percentage:.2f}%")
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
 aiohttp
 redis[hiredis]
-tensorflow
+torch
 transformers