模型:
cardiffnlp/xlm-twitter-politics-sentiment
This is an "extension" of the multilingual twitter-xlm-roberta-base-sentiment model ( model , original paper ) with a focus on sentiment from politicians' tweets. The original sentiment fine-tuning was done on 8 languages (Ar, En, Fr, De, Hi, It, Sp, Pt) but further training was done using tweets from Members of Parliament from UK (English), Spain (Spanish) and Greece (Greek).
from transformers import AutoModelForSequenceClassification from transformers import TFAutoModelForSequenceClassification from transformers import AutoTokenizer import numpy as np from scipy.special import softmax MODEL = f"cardiffnlp/xlm-twitter-politics-sentiment" tokenizer = AutoTokenizer.from_pretrained(MODEL) # PT model = AutoModelForSequenceClassification.from_pretrained(MODEL) text = "Good night ?" text = preprocess(text) encoded_input = tokenizer(text, return_tensors='pt') output = model(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) # # TF # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL) # model.save_pretrained(MODEL) # text = "Good night ?" # encoded_input = tokenizer(text, return_tensors='tf') # output = model(encoded_input) # scores = output[0][0].numpy() # scores = softmax(scores) # Print labels and scores ranking = np.argsort(scores) for i in range(scores.shape[0]): s = scores[ranking[i]] print(i, s)
Output:
0 0.0048229103 1 0.03117284 2 0.9640044