Miss-matching ROC and AUC calculations in python

Miss-matching ROC and AUC calculations in python - python

I am currently using python to train a random forest model. I initially tried to compute the ROC curve representations as follows:
import scikitplot as skplt
from sklearn.metrics import RocCurveDisplay
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
model2_bert = RandomForestClassifier(bootstrap=False, max_depth=None, max_features='auto', min_samples_leaf=5, min_samples_split=5, n_estimators=50)
rf1 = model2_bert.fit(X_train, y_train)
y_hat = rf1.predict(X_test)
ax = plt.gca()
rfc_disp = RocCurveDisplay.from_estimator(rf1, X_test, y_test, ax=ax)
plt.show()
This gives me the figure :
As I was getting some AUC=1 for some specifications, I decided to try to calculate this in other ways.
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_hat)
roc_auc = metrics.auc(fpr, tpr)
roc_auc
# method I: plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
And the third method :
y_prob = rf.predict_proba(X_test)
rf1 = model2_bert.fit(X_train, y_train)
skplt.metrics.plot_roc_curve(y_test, y_prob)
metrics.plot_roc_curve(rf1, X_test, y_test)
plt.show()
It seems like the last two coincide and are based on the calculated AUC. What is wrong with the first graph, then?
PS: This is the confusion matris so AUC=0.97 seems to high for me. I even got AUC=1 in the first figure for some specifications...

I think the problem is that y_hat = rf1.predict(X_test) is returning binary classification output (0 and 1). For the ROC AUC you need a probability or score.
Instead, you should use predict_proba:
y_prob = rf1.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_prob )
....
<rest of your code>

Related

My function to investigate the impact of sample size on the text classifier performance is not working correctly

I have defined the following function that returns the AUC and PRC scores for training and test datasets which you can find through the links below:
train dataset https://drive.google.com/file/d/1466SDm1nOpeDb_3UnW8Qjc1VEY_Be0R5/view?usp=sharing
test dataset https://drive.google.com/file/d/1vphjb3xbrklhLHNMYUexN6X_axepm0Xy/view?usp=sharing
Both of the datasets have samples in the following format. The text column contains documents, and the label column gives the sentiment of each document.
label text
1 I must admit that I'm addicted to "Version 2.0...
0 I think it's such a shame that an enormous tal...
1 The Sunsout No Room at The Inn Puzzle has oddl...
... ...
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_curve, auc,precision_recall_curve
from sklearn.metrics import classification_report
from matplotlib import pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
train = pd.read_csv("train5-1.csv")
test = pd.read_csv("test5.csv")
def create_model(train_docs, train_y, test_docs, test_y, \
model_type='svm', stop_words=None, min_df=1, print_result = True, algorithm_para=1.0):
tfidf_vect = TfidfVectorizer(stop_words=stop_words,min_df=min_df)
tfidf_vect.fit_transform(train["text"])
y_test=test['label'].values
y_train=train["label"].values
X_train=tfidf_vect.fit_transform(train['text'].values)
X_test=tfidf_vect.transform(test['text'].values)
if 'svm' in model_type:
clf = svm.SVC(kernel='linear',probability=True)
clf=svm.LinearSVC(C=algorithm_para).fit(X_train, y_train)
predicted=clf.predict(X_test)
labels=sorted(train['label'].unique())
precision, recall, fscore, support=\
precision_recall_fscore_support(\
y_test, predicted, labels=labels)
if print_result==True:
print("labels: ", labels)
print("precision: ", precision)
print("recall: ", recall)
print("f-score: ", fscore)
print("support: ", support)
predict_p=clf._predict_proba_lr(X_test)
labels
predict_p[0:3]
y_test[0:3]
y_pred = predict_p[:,1]
fpr, tpr, thresholds = roc_curve(y_test,y_pred, pos_label=1)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred, pos_label=1)
auc_score= auc(fpr, tpr)
prc_score=auc(recall, precision)
if print_result==True:
print("AUC: {:.2%}".format(auc_score), "PRC: {:.2%}".format(prc_score))
plt.figure();
plt.plot(fpr, tpr, color='darkorange', lw=2);
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--');
plt.xlim([0.0, 1.0]);
plt.ylim([0.0, 1.05]);
plt.xlabel('False Positive Rate');
plt.ylabel('True Positive Rate');
plt.title('AUC of SVM Model');
plt.show()
plt.figure();
plt.plot(recall, precision, color='darkorange', lw=2);
plt.xlim([0.0, 1.0]);
plt.ylim([0.0, 1.05]);
plt.xlabel('Recall');
plt.ylabel('Precision');
plt.title('Precision_Recall_Curve of SVM Model');
plt.show();
else:
clf=MultinomialNB(alpha=algorithm_para).fit(X_train, y_train)
predicted=clf.predict(X_test)
labels=sorted(train['label'].unique())
precision, recall, fscore, support=\
precision_recall_fscore_support(\
y_test, predicted, labels=labels)
if print_result==True:
print("labels: ", labels)
print("precision: ", precision)
print("recall: ", recall)
print("f-score: ", fscore)
print("support: ", support)
predict_p=clf.predict_proba(X_test)
labels
predict_p[0:3]
y_test[0:3]
y_pred = predict_p[:,1]
fpr, tpr, thresholds = roc_curve(y_test,y_pred, pos_label=1)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred, pos_label=1)
auc_score= auc(fpr, tpr)
prc_score=auc(recall, precision)
if print_result==True:
print("AUC: {:.2%}".format(auc_score), "PRC: {:.2%}".format(prc_score))
plt.figure();
plt.plot(fpr, tpr, color='darkorange', lw=2);
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--');
plt.xlim([0.0, 1.0]);
plt.ylim([0.0, 1.05]);
plt.xlabel('False Positive Rate');
plt.ylabel('True Positive Rate');
plt.title('AUC of SVM Model');
plt.show()
plt.figure();
plt.plot(recall, precision, color='darkorange', lw=2);
plt.xlim([0.0, 1.0]);
plt.ylim([0.0, 1.05]);
plt.xlabel('Recall');
plt.ylabel('Precision');
plt.title('Precision_Recall_Curve of SVM Model');
plt.show();
return auc_score, prc_score
Then, to investigate the impact of sample size on the above classifier performance, I defined another function as follows:
def sample_size_impact(train_docs, train_y, test_docs, test_y):
auc_list_svm=[]
t_size = np.linspace(500,12000, 24)
for i in range (int(len(train_docs)/500)):
auc_score_svm= create_model(train_docs[:(i+1)*500], train_y[:(i+1)*500], test_docs, test_y, \
model_type='svm', stop_words = 'english', min_df = 1, print_result=False, algorithm_para=1.0)
auc_list_svm.append(auc_score_svm)
plt.figure();
plt.plot(auc_list_svm, color='darkorange');
plt.xlabel('Smple Size');
plt.ylabel('AUC');
plt.title('sample size impact comparison');
plt.show()
But the sample_size_impact function is not working correctly.
Would you please investigate my code and tell me where I made a mistake?

You have an error in create_model, you're using the global (full) training data train every time instead of the argument train_docs. It should be:
tfidf_vect.fit_transform(train_docs["text"])
y_test=test_docs['label'].values
y_train=train_docs["label"].values
X_train=tfidf_vect.fit_transform(train_docs['text'].values)
X_test=tfidf_vect.transform(test_docs['text'].values)

ROC curve with Leave-One-Out Cross validation in sklearn

I want to plot a ROC curve of a classifier using leave-one-out cross validation.
It seems that a similar question has been asked here but without any answer.
In another question here is was stated:
In order to obtain a meaningful ROC AUC with LeaveOneOut, you need to
calculate probability estimates for each fold (each consisting of just
one observation), then calculate the ROC AUC on the set of all these
probability estimates.
Additionally, in the official scikit-learn website there is a similar example but using KFold cross validation (https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html#sphx-glr-auto-examples-model-selection-plot-roc-crossval-py).
So for the leave-one-out cross validation case, I am thinking of gathering all the probability prediction on the test sets (one sample at the time) and after having the predicted probabilities for all my folds, to compute and plot the ROC curve.
Does this seems okay? I do not see any other way to achieve my goal.
Here is my code:
from sklearn.svm import SVC
import numpy as np, matplotlib.pyplot as plt, pandas as pd
from sklearn.model_selection import cross_val_score,cross_val_predict, KFold, LeaveOneOut, StratifiedKFold
from sklearn.metrics import roc_curve, auc
from sklearn import datasets
# Import some data to play with
iris = datasets.load_iris()
X_svc = iris.data
y = iris.target
X_svc, y = X_svc[y != 2], y[y != 2]
clf = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=0)
kf = LeaveOneOut()
all_y = []
all_probs=[]
for train, test in kf.split(X_svc, y):
all_y.append(y[test])
all_probs.append(clf.fit(X_svc[train], y[train]).predict_proba(X_svc[test])[:,1])
all_y = np.array(all_y)
all_probs = np.array(all_probs)
fpr, tpr, thresholds = roc_curve(all_y,all_probs)
roc_auc = auc(fpr, tpr)
plt.figure(1, figsize=(12,6))
plt.plot(fpr, tpr, lw=2, alpha=0.5, label='LOOCV ROC (AUC = %0.2f)' % (roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='k', label='Chance level', alpha=.8)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.grid()
plt.show()

I believe the code is correct and the splitting too. I've added a few lines for validation purposes of both the implementation and the results:
from sklearn.model_selection import cross_val_score,cross_val_predict, KFold, LeaveOneOut, StratifiedKFold
from sklearn.metrics import roc_curve, auc
from sklearn import datasets
# Import some data to play with
iris = datasets.load_iris()
X_svc = iris.data
y = iris.target
X_svc, y = X_svc[y != 2], y[y != 2]
clf = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=0)
kf = LeaveOneOut()
if kf.get_n_splits(X_svc) == len(X_svc):
print("They are the same length, splitting correct")
else:
print("Something is wrong")
all_y = []
all_probs=[]
for train, test in kf.split(X_svc, y):
all_y.append(y[test])
all_probs.append(clf.fit(X_svc[train], y[train]).predict_proba(X_svc[test])[:,1])
all_y = np.array(all_y)
all_probs = np.array(all_probs)
#print(all_y) #For validation
#print(all_probs) #For validation
fpr, tpr, thresholds = roc_curve(all_y,all_probs)
print(fpr, tpr, thresholds) #For validation
roc_auc = auc(fpr, tpr)
plt.figure(1, figsize=(12,6))
plt.plot(fpr, tpr, lw=2, alpha=0.5, label='LOOCV ROC (AUC = %0.2f)' % (roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='k', label='Chance level', alpha=.8)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.grid()
plt.show()
The If line is meant to only make sure that the splitting is made n times, where n is the number of observations for the given dataset. This is because as the documentation states, LeaveOneOut works the same as Kfold(n_splits=n) and LeaveOneOut(p=1).
Also when printing the predicted proba values they were good, making sense of the curve. Congratz on your 1.00AUC!

ROC for multiclass classification

I'm doing different text classification experiments. Now I need to calculate the AUC-ROC for each task. For the binary classifications, I already made it work with this code:
scaler = StandardScaler(with_mean=False)
enc = LabelEncoder()
y = enc.fit_transform(labels)
feat_sel = SelectKBest(mutual_info_classif, k=200)
clf = linear_model.LogisticRegression()
pipe = Pipeline([('vectorizer', DictVectorizer()),
('scaler', StandardScaler(with_mean=False)),
('mutual_info', feat_sel),
('logistregress', clf)])
y_pred = model_selection.cross_val_predict(pipe, instances, y, cv=10)
# instances is a list of dictionaries
#visualisation ROC-AUC
fpr, tpr, thresholds = roc_curve(y, y_pred)
auc = auc(fpr, tpr)
print('auc =', auc)
plt.figure()
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b',
label='AUC = %0.2f'% auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
But now I need to do it for the multiclass classification task. I read somewhere that I need to binarize the labels, but I really don't get how to calculate ROC for multiclass classification. Tips?

As people mentioned in comments you have to convert your problem into binary by using OneVsAll approach, so you'll have n_class number of ROC curves.
A simple example:
from sklearn.metrics import roc_curve, auc
from sklearn import datasets
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
iris = datasets.load_iris()
X, y = iris.data, iris.target
y = label_binarize(y, classes=[0,1,2])
n_classes = 3
# shuffle and split training and test sets
X_train, X_test, y_train, y_test =\
train_test_split(X, y, test_size=0.33, random_state=0)
# classifier
clf = OneVsRestClassifier(LinearSVC(random_state=0))
y_score = clf.fit(X_train, y_train).decision_function(X_test)
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Plot of a ROC curve for a specific class
for i in range(n_classes):
plt.figure()
plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

This works for me and is nice if you want them on the same plot. It is similar to
#omdv's answer but maybe a little more succinct.
def plot_multiclass_roc(clf, X_test, y_test, n_classes, figsize=(17, 6)):
y_score = clf.decision_function(X_test)
# structures
fpr = dict()
tpr = dict()
roc_auc = dict()
# calculate dummies once
y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# roc for each class
fig, ax = plt.subplots(figsize=figsize)
ax.plot([0, 1], [0, 1], 'k--')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Receiver operating characteristic example')
for i in range(n_classes):
ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
ax.legend(loc="best")
ax.grid(alpha=.4)
sns.despine()
plt.show()
plot_multiclass_roc(full_pipeline, X_test, y_test, n_classes=16, figsize=(16, 10))

To plot the multi-class ROC use label_binarize function and the following code. Adjust and change the code depending on your application.
Example using Iris data:
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from sklearn.multiclass import OneVsRestClassifier
from itertools import cycle
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Binarize the output
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
random_state=0))
y_score = classifier.fit(X_train, y_train).decision_function(X_test)
fpr = dict()
tpr = dict()
roc_auc = dict()
lw=2
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
colors = cycle(['blue', 'red', 'green'])
for i, color in zip(range(n_classes), colors):
plt.plot(fpr[i], tpr[i], color=color, lw=2,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic for multi-class data')
plt.legend(loc="lower right")
plt.show()
In this example, you can print the y_score.
print(y_score)
array([[-3.58459897, -0.3117717 , 1.78242707],
[-2.15411929, 1.11394949, -2.393737 ],
[ 1.89199335, -3.89592195, -6.29685764],
.
.
.

Try this method.It worked for me also very simple to use
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
from itertools import cycle
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import OneHotEncoder
#One-hot encoder
y_valid=y_test.values.reshape(-1,1)
ypred=pred_final.reshape(-1,1)
y_valid = pd.DataFrame(y_test)
ypred=pd.DataFrame(pred_final)
onehotencoder = OneHotEncoder()
y_valid= onehotencoder.fit_transform(y_valid).toarray()
ypred = onehotencoder.fit_transform(ypred).toarray()
n_classes = ypred.shape[1]
# Plotting and estimation of FPR, TPR
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_valid[:, i], ypred[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
colors = cycle(['blue', 'green', 'red','darkorange','olive','purple','navy'])
for i, color in zip(range(n_classes), colors):
pyplot.plot(fpr[i], tpr[i], color=color, lw=1.5, label='ROC curve of class {0}
(area = {1:0.2f})' ''.format(i+1, roc_auc[i]))
pyplot.plot([0, 1], [0, 1], 'k--', lw=1.5)
pyplot.xlim([-0.05, 1.0])
pyplot.ylim([0.0, 1.05])
pyplot.xlabel('False Positive Rate',fontsize=12, fontweight='bold')
pyplot.ylabel('True Positive Rate',fontsize=12, fontweight='bold')
pyplot.tick_params(labelsize=12)
pyplot.legend(loc="lower right")
ax = pyplot.axes()
pyplot.show()

How to plot ROC curve in Python

I am trying to plot a ROC curve to evaluate the accuracy of a prediction model I developed in Python using logistic regression packages. I have computed the true positive rate as well as the false positive rate; however, I am unable to figure out how to plot these correctly using matplotlib and calculate the AUC value. How could I do that?

Here are two ways you may try, assuming your model is an sklearn predictor:
import sklearn.metrics as metrics
# calculate the fpr and tpr for all thresholds of the classification
probs = model.predict_proba(X_test)
preds = probs[:,1]
fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
roc_auc = metrics.auc(fpr, tpr)
# method I: plt
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
# method II: ggplot
from ggplot import *
df = pd.DataFrame(dict(fpr = fpr, tpr = tpr))
ggplot(df, aes(x = 'fpr', y = 'tpr')) + geom_line() + geom_abline(linetype = 'dashed')
or try
ggplot(df, aes(x = 'fpr', ymin = 0, ymax = 'tpr')) + geom_line(aes(y = 'tpr')) + geom_area(alpha = 0.2) + ggtitle("ROC Curve w/ AUC = %s" % str(roc_auc))

This is the simplest way to plot an ROC curve, given a set of ground truth labels and predicted probabilities. Best part is, it plots the ROC curve for ALL classes, so you get multiple neat-looking curves as well
import scikitplot as skplt
import matplotlib.pyplot as plt
y_true = # ground truth labels
y_probas = # predicted probabilities generated by sklearn classifier
skplt.metrics.plot_roc_curve(y_true, y_probas)
plt.show()
Here's a sample curve generated by plot_roc_curve. I used the sample digits dataset from scikit-learn so there are 10 classes. Notice that one ROC curve is plotted for each class.
Disclaimer: Note that this uses the scikit-plot library, which I built.

AUC curve For Binary Classification using matplotlib
from sklearn import svm, datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
Load Breast Cancer Dataset
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33, random_state=44)
Model
clf = LogisticRegression(penalty='l2', C=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
Accuracy
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
AUC Curve
y_pred_proba = clf.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

It is not at all clear what the problem is here, but if you have an array true_positive_rate and an array false_positive_rate, then plotting the ROC curve and getting the AUC is as simple as:
import matplotlib.pyplot as plt
import numpy as np
x = # false_positive_rate
y = # true_positive_rate
# This is the ROC curve
plt.plot(x,y)
plt.show()
# This is the AUC
auc = np.trapz(y,x)

Here is python code for computing the ROC curve (as a scatter plot):
import matplotlib.pyplot as plt
import numpy as np
score = np.array([0.9, 0.8, 0.7, 0.6, 0.55, 0.54, 0.53, 0.52, 0.51, 0.505, 0.4, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.30, 0.1])
y = np.array([1,1,0, 1, 1, 1, 0, 0, 1, 0, 1,0, 1, 0, 0, 0, 1 , 0, 1, 0])
# false positive rate
fpr = []
# true positive rate
tpr = []
# Iterate thresholds from 0.0, 0.01, ... 1.0
thresholds = np.arange(0.0, 1.01, .01)
# get number of positive and negative examples in the dataset
P = sum(y)
N = len(y) - P
# iterate through all thresholds and determine fraction of true positives
# and false positives found at this threshold
for thresh in thresholds:
FP=0
TP=0
for i in range(len(score)):
if (score[i] > thresh):
if y[i] == 1:
TP = TP + 1
if y[i] == 0:
FP = FP + 1
fpr.append(FP/float(N))
tpr.append(TP/float(P))
plt.scatter(fpr, tpr)
plt.show()

from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
y_true = # true labels
y_probas = # predicted results
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_probas, pos_label=0)
# Print ROC curve
plt.plot(fpr,tpr)
plt.show()
# Print AUC
auc = np.trapz(tpr,fpr)
print('AUC:', auc)

Based on multiple comments from stackoverflow, scikit-learn documentation and some other, I made a python package to plot ROC curve (and other metric) in a really simple way.
To install package : pip install plot-metric (more info at the end of post)
To plot a ROC Curve (example come from the documentation) :
Binary classification
Let's load a simple dataset and make a train & test set :
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
X, y = make_classification(n_samples=1000, n_classes=2, weights=[1,1], random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=2)
Train a classifier and predict test set :
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=50, random_state=23)
model = clf.fit(X_train, y_train)
# Use predict_proba to predict probability of the class
y_pred = clf.predict_proba(X_test)[:,1]
You can now use plot_metric to plot ROC Curve :
from plot_metric.functions import BinaryClassification
# Visualisation with plot_metric
bc = BinaryClassification(y_test, y_pred, labels=["Class 1", "Class 2"])
# Figures
plt.figure(figsize=(5,5))
bc.plot_roc_curve()
plt.show()
Result :
You can find more example of on the github and documentation of the package:
Github : https://github.com/yohann84L/plot_metric
Documentation : https://plot-metric.readthedocs.io/en/latest/

The previous answers assume that you indeed calculated TP/Sens yourself. It's a bad idea to do this manually, it's easy to make mistakes with the calculations, rather use a library function for all of this.
the plot_roc function in scikit_lean does exactly what you need:
http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
The essential part of the code is:
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])

There is a library called metriculous that will do that for you:
$ pip install metriculous
Let's first mock some data, this would usually come from the test dataset and the model(s):
import numpy as np
def normalize(array2d: np.ndarray) -> np.ndarray:
return array2d / array2d.sum(axis=1, keepdims=True)
class_names = ["Cat", "Dog", "Pig"]
num_classes = len(class_names)
num_samples = 500
# Mock ground truth
ground_truth = np.random.choice(range(num_classes), size=num_samples, p=[0.5, 0.4, 0.1])
# Mock model predictions
perfect_model = np.eye(num_classes)[ground_truth]
noisy_model = normalize(
perfect_model + 2 * np.random.random((num_samples, num_classes))
)
random_model = normalize(np.random.random((num_samples, num_classes)))
Now we can use metriculous to generate a table with various metrics and diagrams, including ROC curves:
import metriculous
metriculous.compare_classifiers(
ground_truth=ground_truth,
model_predictions=[perfect_model, noisy_model, random_model],
model_names=["Perfect Model", "Noisy Model", "Random Model"],
class_names=class_names,
one_vs_all_figures=True, # This line is important to include ROC curves in the output
).save_html("model_comparison.html").display()
The ROC curves in the output:
The plots are zoomable and draggable, and you get further details when hovering with your mouse over the plot:

You can also follow the offical documentation form scikit:
https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py

I have made a simple function included in a package for the ROC curve. I just started practicing machine learning so please also let me know if this code has any problem!
Have a look at the github readme file for more details! :)
https://github.com/bc123456/ROC
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
def plot_ROC(y_train_true, y_train_prob, y_test_true, y_test_prob):
'''
a funciton to plot the ROC curve for train labels and test labels.
Use the best threshold found in train set to classify items in test set.
'''
fpr_train, tpr_train, thresholds_train = roc_curve(y_train_true, y_train_prob, pos_label =True)
sum_sensitivity_specificity_train = tpr_train + (1-fpr_train)
best_threshold_id_train = np.argmax(sum_sensitivity_specificity_train)
best_threshold = thresholds_train[best_threshold_id_train]
best_fpr_train = fpr_train[best_threshold_id_train]
best_tpr_train = tpr_train[best_threshold_id_train]
y_train = y_train_prob > best_threshold
cm_train = confusion_matrix(y_train_true, y_train)
acc_train = accuracy_score(y_train_true, y_train)
auc_train = roc_auc_score(y_train_true, y_train)
print 'Train Accuracy: %s ' %acc_train
print 'Train AUC: %s ' %auc_train
print 'Train Confusion Matrix:'
print cm_train
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(121)
curve1 = ax.plot(fpr_train, tpr_train)
curve2 = ax.plot([0, 1], [0, 1], color='navy', linestyle='--')
dot = ax.plot(best_fpr_train, best_tpr_train, marker='o', color='black')
ax.text(best_fpr_train, best_tpr_train, s = '(%.3f,%.3f)' %(best_fpr_train, best_tpr_train))
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve (Train), AUC = %.4f'%auc_train)
fpr_test, tpr_test, thresholds_test = roc_curve(y_test_true, y_test_prob, pos_label =True)
y_test = y_test_prob > best_threshold
cm_test = confusion_matrix(y_test_true, y_test)
acc_test = accuracy_score(y_test_true, y_test)
auc_test = roc_auc_score(y_test_true, y_test)
print 'Test Accuracy: %s ' %acc_test
print 'Test AUC: %s ' %auc_test
print 'Test Confusion Matrix:'
print cm_test
tpr_score = float(cm_test[1][1])/(cm_test[1][1] + cm_test[1][0])
fpr_score = float(cm_test[0][1])/(cm_test[0][0]+ cm_test[0][1])
ax2 = fig.add_subplot(122)
curve1 = ax2.plot(fpr_test, tpr_test)
curve2 = ax2.plot([0, 1], [0, 1], color='navy', linestyle='--')
dot = ax2.plot(fpr_score, tpr_score, marker='o', color='black')
ax2.text(fpr_score, tpr_score, s = '(%.3f,%.3f)' %(fpr_score, tpr_score))
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve (Test), AUC = %.4f'%auc_test)
plt.savefig('ROC', dpi = 500)
plt.show()
return best_threshold
A sample roc graph produced by this code

When you need the probabilities as well... The following gets the AUC value and plots it all in one shot.
from sklearn.metrics import plot_roc_curve
plot_roc_curve(m,xs,y)
When you have the probabilities... you can't get the auc value and plots in one shot. Do the following:
from sklearn.metrics import roc_curve
fpr,tpr,_ = roc_curve(y,y_probas)
plt.plot(fpr,tpr, label='AUC = ' + str(round(roc_auc_score(y,m.oob_decision_function_[:,1]), 2)))
plt.legend(loc='lower right')

In my code, I have X_train and y_train and classes are 0 and 1. The clf.predict_proba() method computes probabilities for both classes for every data point. I compare the probability of class1 with different values of threshold.
probability = clf.predict_proba(X_train)
def plot_roc(y_train, probability):
threshold_values = np.linspace(0,1,100) #Threshold values range from 0 to 1
FPR_list = []
TPR_list = []
for threshold in threshold_values: #For every value of threshold
y_pred = [] #Classify every data point in the test set
#prob is an array consisting of 2 values - Probability of datapoint in Class0 and Class1.
for prob in probability:
if ((prob[1])<threshold): #Prob of class1 (positive class)
y_pred.append(0)
continue
elif ((prob[1])>=threshold): y_pred.append(1)
#Plot Confusion Matrix and Obtain values of TP, FP, TN, FN
c_m = confusion_matrix(y, y_pred)
TN = c_m[0][0]
FP = c_m[0][1]
FN = c_m[1][0]
TP = c_m[1][1]
FPR = FP/(FP + TN) #Obtain False Positive Rate
TPR = TP/(TP + FN) #Obtain True Positive Rate
FPR_list.append(FPR)
TPR_list.append(TPR)
fig = plt.figure()
plt.plot(FPR_list, TPR_list)
plt.ylabel('TPR')
plt.xlabel('FPR')
plt.show()

A new open-source I help maintain have many ways to test model performance. to see ROC curve you can do:
from deepchecks.checks import RocReport
from deepchecks import Dataset
RocReport().run(Dataset(df, label='target'), model)
And the result looks like this:
A more elaborate example of RocReport can be found here

As The ROC Curve is only for Binary Classification
Then use your data Binarize and raveled
# Binarize data for getting AUC
y_test_bin = label_binarize(y_test, classes=range(y_train.min() , y_train.max()))
y_pred_bin = label_binarize(Predicted_result, classes=range(y_train.min() , y_train.max()))
# Calculate FP , TP rate
fpr, tpr, _ = roc_curve(y_test_bin.ravel(), y_pred_bin.ravel() )
# Get AUC ,
auc = roc_auc_score(y_test_bin, y_pred_bin, average='micro', multi_class='ovr')
#create ROC curve
plt.plot(fpr,tpr , label= f"AUC = {auc}" , )
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.title('ROC')
plt.legend(loc=7)
plt.figure(figsize = [])
plt.show()

Computing scikit-learn multiclass ROC Curve with cross validation (CV)

I want to evaluate my classification models with a ROC curve. I'm struggling with computing a multiclass ROC Curve for a cross-validated data set. There is no division in train and test set, because of the cross-validation.
Underneath, you can see the code I already tried.
scaler = StandardScaler(with_mean=False)
enc = LabelEncoder()
y = enc.fit_transform(labels)
vec = DictVectorizer()
feat_sel = SelectKBest(mutual_info_classif, k=200)
n_classes = 3
# Pipeline for computing of ROC curves
clf = OneVsRestClassifier(LogisticRegression(solver='newton-cg', multi_class='multinomial'))
clf = clf.label_binarizer_
pipe = Pipeline([('vectorizer', vec),
('scaler', scaler),
('Logreg', clf),
('mutual_info',feat_sel)])
y_pred = model_selection.cross_val_predict(pipe, instances, y, cv=10)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y[:, i], y_pred[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Plot of a ROC curve for a specific class
for i in range(n_classes):
plt.figure()
plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
I thought I could binarize my y_pred by using the attribute label_binarizer_ for the OneVsRestclassifier as mentioned here: sklearn.multiclass.OneVsRestclassifier.
However, I get the following error: AttributeError: 'OneVsRestClassifier' object has no attribute 'label_binarizer_'. I don't get this error, because the documentation tells me that it is an
attribute from this classifier.
when I add instances = DataFrame(instances) and clf.fit(instances, y), I get the error: ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
. Instances is a list of feature vector dictionaries. I tried adding instances = np.array(instances) instead, but this gives me this error: TypeError: float() argument must be a string or a number, not 'dict'
What am I doing wrong?

You can use label_binarizer this way and get the desired plot as output.
Example using Iris data:
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Binarize the output
y_bin = label_binarize(y, classes=[0, 1, 2])
n_classes = y_bin.shape[1]
pipe= Pipeline([('scaler', StandardScaler()), ('clf', LogisticRegression())])
# or
#clf = OneVsRestClassifier(LogisticRegression())
#pipe= Pipeline([('scaler', StandardScaler()), ('clf', clf)])
y_score = cross_val_predict(pipe, X, y, cv=10 ,method='predict_proba')
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
colors = cycle(['blue', 'red', 'green'])
for i, color in zip(range(n_classes), colors):
plt.plot(fpr[i], tpr[i], color=color, lw=lw,
label='ROC curve of class {0} (area = {1:0.2f})'
''.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic for multi-class data')
plt.legend(loc="lower right")
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Miss-matching ROC and AUC calculations in python - python

Related

My function to investigate the impact of sample size on the text classifier performance is not working correctly

ROC curve with Leave-One-Out Cross validation in sklearn

ROC for multiclass classification

How to plot ROC curve in Python

Computing scikit-learn multiclass ROC Curve with cross validation (CV)

Categories

Resources