Does anyone know why these white lines are quartering my confusion matrix? I've changed many of the parameters but cannot figure it out. The only thing that makes them go away is if I don't label the blocks at all, ie '0', '1',... but that's obviously not what I want. Any help would be appreciated.
Code:
def plot_confusion_matrix(cm,
target_names = ['1', '2', '3', '4'],
title = 'Confusion matrix',
cmap = None,
normalize = False):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize = (8, 6))
plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation = 0)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment = "center",
color = "white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment = "center",
color = "white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
plot_confusion_matrix(cm = (confusion),
normalize = True,
target_names = ['1', '2', '3', '4'],
title = "Confusion Matrix")
Output is:
plt.figure(figsize=(10,5))
plt.grid(False)
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=False, title='Normalized confusion matrix')
Try resetting it
import seaborn as sns
# Try resetting it
sns.reset_orig()
Related
I have N samples, each of which has n values in it. For each sample, I plot the histogram of n values. In total, I would have N histogram. Now I would like to have a plot that shows the mean of these histograms and have the 5-95% quantile region shaded. An example of such a plot would look like (please don't mind the dashed line and black line, just the shaded area.)
So far, I have plotted all the N histograms on top of each other.
I realized that the mean of these histograms would be the histogram of all [N X n] values together. A sample code for this would look like
import numpy as np
import matplotlib.pyplot as plt
samples = []
N = 20
n = 250
for i in range(N):
samples.append(np.random.normal(loc=np.random.rand(1,)[0]/5-0.1, scale=1., size=n))
values_all = None
for i in range(len(samples)):
values = samples[i]
print(values)
weights = np.ones_like(values) / float(len(values))
plt.hist(values, range=[-4, 4], density=False, histtype='step', color='red', bins=15, weights=weights)
if values_all is None:
values_all = values
else:
values_all = np.concatenate(([values_all, values]), axis=0)
weights = np.ones_like(values_all) / float(len(values_all))
plt.hist(values_all, range=[-4, 4], density=False, histtype='step', color='black', bins=15, weights=weights)
plt.show()
Any suggestions on how to find and plot the 5-95% quantiles would be appreciated.
You should get the probabilities for each bin and find the quantiles for them. Here is a sample code
import numpy as np
import matplotlib.pyplot as plt
# generate data
samples = []
N = 20
n = 250
for i in range(N):
samples.append(np.random.normal(loc=np.random.rand(1,)[0]/5-0.1, scale=1., size=n))
prob_all = None
for i in range(len(samples)):
values = samples[i]
weights = np.ones_like(values) / float(len(values))
n, bins, patches = plt.hist(values, range=[-4, 4], density=False, histtype='step', color='red', bins=15, weights=weights, alpha = 0.5)
# concatanate bin probabilities
if prob_all is None:
prob_all = n.reshape(-1,1)
else:
prob_all = np.concatenate(([prob_all, n.reshape(-1,1)]), axis=1)
plt.close() # don't plot previous histograms
# find quantiles for each bin
quant = np.quantile(prob_all, [0.05, 0.5, 0.95], axis=1)
# plot histogram from bins and probabilities
def plt_hist(bins, quant, clr, alph, lw):
for j in range(len(n)):
plt.plot([bins[j], bins[j + 1]], [quant[j], quant[j]], color=clr, linewidth=lw, alpha = alph)
plt.plot([bins[0], bins[0]], [0., quant[0]], color=clr, linewidth=lw, alpha = alph)
plt.plot([bins[len(n)], bins[len(n)]], [quant[len(n) - 1], 0.], color=clr, linewidth=lw, alpha = alph)
for j in range(len(n) - 1):
plt.plot([bins[j + 1], bins[j + 1]], [quant[j], quant[j + 1]], color=clr, linewidth=lw, alpha = alph)
fig, ax = plt.subplots()
ax.set_ylim([0., 0.3])
ax.set_xlim([-4.5, 4.5])
# plot 50% quantile (mean)
plt_hist(bins, quant[1], clr='blue', alph=1., lw=1.)
# shade between quantiles
for i in range(len(n)):
x = np.arange(bins[i], bins[i+1], 0.0001)
y1 = quant[0,i]
y2 = quant[2,i]
ax.fill_between(x, y1, y2, facecolor='red', alpha=0.4)
# boarder for shadings
plt_hist(bins, quant[0], clr='black', alph=.2, lw=1.)
plt_hist(bins, quant[2], clr='black', alph=.2, lw=1.)
plt.show()
I have a problem with the confusion matrix when i using the code in scikit-learn
this what i got
as you see the first class is cut
!!!update!!!
i force it work by using this rows
plt.xlim(-0.5, 5.5)
plt.ylim(5.5, -0.5)
and get this
but i still wants to know if there is other way to make it not specific to 5 classes.
i already try to change the ax size but it wasnt work out
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = list(unique_labels(y_true, y_pred))
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
plot_confusion_matrix(y, y_pred, classes=[0, 1, 2, 3, 4, 5], normalize=True,
title='Normalized confusion matrix')
i want that the box will not cut the first and last row
You need in this case to set xlim and ylim and here is an automatic way to do so for e.g. 10 classes.
Briefly, you need:
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
Full example:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = np.repeat(np.arange(0,10),15)
class_names = np.array(['1', '2', '3', '4', '5','6','7','8','9','10'])
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
return ax
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
I'm using the following function to generate a confusion matrix:
def plot_confusion_matrix(cm, classes, normalize=False, cmap=cm.Blues, png_output=None, show=True):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
title='Normalized confusion matrix'
else:
title='Confusion matrix'
f = plt.figure()
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
if png_output is not None:
os.makedirs(png_output, exist_ok=True)
f.savefig(os.path.join(png_output,'confusion_matrix.png'), bbox_inches='tight')
if show:
plt.show()
plt.close(f)
else:
plt.close(f)
When I have a few classes I get a neat chart like this one:
But when I have a large number of classes I get this:
I tried to use the same approach as used on this solution Python boxplot matplotlib automatic figure size based on the number of categories but it didn't work.
How can I have my confusion matrix adjusting its size based on the number of classes like in the boxplot solution above ?
UPDATE 1
After including the ticks position and dynamic figwidth
def plot_confusion_matrix(y_true,y_pred, classes, normalize=False, cmap=cm.Blues, png_output=None, show=True):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
cm = confusion_matrix(y_true,y_pred)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
title='Normalized confusion matrix'
else:
title='Confusion matrix'
# Calculate chart area size
leftmargin = 0.5 # inches
rightmargin = 0.5 # inches
categorysize = 0.5 # inches
figwidth = leftmargin + rightmargin + (len(classes) * categorysize)
f = plt.figure(figsize=(figwidth, figwidth))
# Create an axes instance and ajust the subplot size
ax = f.add_subplot(111)
ax.set_aspect(1)
f.subplots_adjust(left=leftmargin/figwidth, right=1-rightmargin/figwidth, top=0.94, bottom=0.1)
res = ax.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar(res)
ax.set_xticks(range(len(classes)))
ax.set_yticks(range(len(classes)))
ax.set_xticklabels(classes, rotation=45, ha='right')
ax.set_yticklabels(classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
# plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
if png_output is not None:
os.makedirs(png_output, exist_ok=True)
f.savefig(os.path.join(png_output,'confusion_matrix.png'), bbox_inches='tight')
if show:
plt.show()
plt.close(f)
else:
plt.close(f)
Best Regards.
Kleyson Rios.
Getting the correct chart now with #ImportanceOfBeingErnest helps.
Below the final code:
def plot_confusion_matrix(cm, classes, normalize=False, cmap=cm.Blues, png_output=None, show=True):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
title='Normalized confusion matrix'
else:
title='Confusion matrix'
# Calculate chart area size
leftmargin = 0.5 # inches
rightmargin = 0.5 # inches
categorysize = 0.5 # inches
figwidth = leftmargin + rightmargin + (len(classes) * categorysize)
f = plt.figure(figsize=(figwidth, figwidth))
# Create an axes instance and ajust the subplot size
ax = f.add_subplot(111)
ax.set_aspect(1)
f.subplots_adjust(left=leftmargin/figwidth, right=1-rightmargin/figwidth, top=0.94, bottom=0.1)
res = ax.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar(res)
ax.set_xticks(range(len(classes)))
ax.set_yticks(range(len(classes)))
ax.set_xticklabels(classes, rotation=45, ha='right')
ax.set_yticklabels(classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
ax.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
# plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
if png_output is not None:
os.makedirs(png_output, exist_ok=True)
f.savefig(os.path.join(png_output,'confusion_matrix.png'), bbox_inches='tight')
if show:
plt.show()
plt.close(f)
else:
plt.close(f)
I'm working on plotting sklearn classification report and my plot generated is very narrow, and difficult to read the labels. I used the post here to get the plotting code.
Any suggestions on how to stretch this plot out horizontally? Thank you
def plot_classification_report(cr, title='Classification report ', with_avg_total=False, cmap=plt.cm.Blues):
lines = cr.split('\n')
classes = []
plotMat = []
for line in lines[2 : (len(lines) - 3)]:
#print(line)
t = line.split()
# print(t)
classes.append(t[0])
v = [float(x) for x in t[1: len(t) - 1]]
#print(v)
plotMat.append(v)
if with_avg_total:
aveTotal = lines[len(lines) - 1].split()
classes.append('avg/total')
vAveTotal = [float(x) for x in t[1:len(aveTotal) - 1]]
plotMat.append(vAveTotal)
plt.imshow(plotMat, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
x_tick_marks = np.arange(3)
y_tick_marks = np.arange(len(classes))
plt.xticks(x_tick_marks, ['precision', 'recall', 'f1-score'], rotation=45)
plt.yticks(y_tick_marks, classes)
#plt.tight_layout()
plt.ylabel('Classes')
plt.xlabel('Measures')
plot_classification_report(classification_report(y_test, y_pred))
By default, the axes will have the aspect ratio of the image. You can change that by using the aspect argument to imshow.
Either put it to "auto", to let the image extend to the given space to the axes.
Or, set it to any number, denoting the height over width ratio; number == height/width.
In this case try
plt.imshow(plotMat, interpolation='nearest', cmap=cmap, aspect="auto")
or
plt.imshow(plotMat, interpolation='nearest', cmap=cmap, aspect=len(classes)/12.)
and adapt it to your needs.
I am plotting a confusion matrix for a multiple labelled data, where labels look like:
label1: 1, 0, 0, 0
label2: 0, 1, 0, 0
label3: 0, 0, 1, 0
label4: 0, 0, 0, 1
I am able to classify successfully using the below code. I only need some help to plot confusion matrix.
for i in range(4):
y_train= y[:,i]
print('Train subject %d, class %s' % (subject, cols[i]))
lr.fit(X_train[::sample,:],y_train[::sample])
pred[:,i] = lr.predict_proba(X_test)[:,1]
I used the following code to print confusion matrix, but it always return a 2X2 matrix
prediction = lr.predict(X_train)
print(confusion_matrix(y_train, prediction))
I found a function that can plot the confusion matrix which generated from sklearn.
import numpy as np
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
It will look like this
This works the best for me :
from sklearn.metrics import multilabel_confusion_matrix
y_unique = y_test.unique()
mcm = multilabel_confusion_matrix(y_test, y_pred, labels = y_unique)
mcm
I see this is still an open issue in sklearn's repository:
https://github.com/scikit-learn/scikit-learn/issues/3452
However there have been some attempts at implementing it. From the same #3452 thread issue:
https://github.com/Magellanea/scikit-learn/commit/514287c1d5dad2f0ab4918dc4da5cf7053fe6734#diff-b04acd877dd793f28ae7be13a999ed88R187
You can check the code proposed in the function and see if that fits your needs.
from sklearn.metrics import multilabel_confusion_matrix
mul_c = multilabel_confusion_matrix(
test_Y,
pred_k,
labels=["benign", "dos","probe","r2l","u2r"])
mul_c
I found an easy solution with sklearn and seaborn libraries.
from sklearn.metrics import confusion_matrix, classification_report
from matplotlib import pyplot as plt
import seaborn as sns
def plot_confusion_matrix(y_test,y_scores, classNames):
y_test=np.argmax(y_test, axis=1)
y_scores=np.argmax(y_scores, axis=1)
classes = len(classNames)
cm = confusion_matrix(y_test, y_scores)
print("**** Confusion Matrix ****")
print(cm)
print("**** Classification Report ****")
print(classification_report(y_test, y_scores, target_names=classNames))
con = np.zeros((classes,classes))
for x in range(classes):
for y in range(classes):
con[x,y] = cm[x,y]/np.sum(cm[x,:])
plt.figure(figsize=(40,40))
sns.set(font_scale=3.0) # for label size
df = sns.heatmap(con, annot=True,fmt='.2', cmap='Blues',xticklabels= classNames , yticklabels= classNames)
df.figure.savefig("image2.png")
classNames = ['A', 'B', 'C', 'D', 'E']
plot_confusion_matrix(y_test,y_scores, classNames)
#y_test is your ground truth
#y_scores is your predicted probabilities
Just use pandas with gradient coloring:
cm = confusion_matrix(y_true, y_pred)
cm = pd.DataFrame(data=cm, columns = np.unique(y_true), index = np.unique(y_true))
cm = (cm / cm.sum(axis = 1).values.reshape(-1,1)) # to fractions of 1
cm.style.background_gradient().format(precision=2)
By now pandas has nice options for table formatting and decoration.