I am trying to create a confusion matrix in TensorFlow but I am getting a
TypeError: Image data cannot be converted to float.
The images are predicted accurately but now I want to show the confusion matrix using matplotlib. I tried converting to to np.array() but the error is still the same.
I am following the official documentation for confusion matrix from scikit-learn.
https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
if result[0][0]>0.85:
predictions.append(result[0][0])
elif result[0][1]>0.85:
predictions.append(result[0][1])
elif result[0][2]>0.85:
predictions.append(result[0][2])
elif result[0][3]>0.85:
predictions.append(result[0][3])
elif result[0][4]>0.85:
predictions.append(result[0][4])
elif result[0][5]>0.85:
predictions.append(result[0][5])
class_names = ['Up', 'Down', 'Left', 'Right', 'Forward', 'Backward']
# label_list contains the filename e.g. hand1.jpg, hand2.jpg....
# Compute confusion matrix
cnf_matrix = tf.confusion_matrix(label_list,predictions,num_classes=6)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
# ERROR HERE
plot_confusion_matrix(cnf_matrix, classes=class_names,title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,title='Normalized confusion matrix')
plt.show()
I have not tested it in my PC. Your description is a little bit ambiguous for me (the line of error, etc.), but the main difference of your code and the documentation you linked is confusion_matrix(). Just try to go with confusion_matrix() of sckit-learn instead of confusion_matrix() of tensorflow (at the link, the former is used). In my opinion, it is the easiest way you can go.
EDIT:
Make your predictions like this:
for i in range(6):
if result[0][i] > 0.85:
predictions.append(i)
continue
Then your predictions will not be continuous ones. Here your predictions should be integers since you are predicting class labels.
Related
I am trying to plot the confusion matrix, but I have a question, why the indices are flipped in the for loop below where we start with plt.text(j, i) instead of plt.text(i, j)? I assume that 2D confusion matrix has values as follows:
(0,0): true zeros.
(0,1): false ones.
(1,0): false zeros.
(1,1): true ones.
Code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
from sklearn.metrics import confusion_matrix
import itertools
true = np.array([0, 1, 0])
pred = np.array([0., 0., 0.])
def plot_confusion_matrix(y_true, y_pred, title='', labels=[0, 1]):
cm = confusion_matrix(y_true, y_pred)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title(title)
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
fmt = 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="black" if cm[i, j] > thresh else "white")
plt.show()
plot_confusion_matrix(true, pred, title='Confusion Matrix')
The first coordinate of the plot usually is drawn horizontally, while the first coordinate of the matrix usually is represented vertically.
For example, the upper right square of the plot has coordinates x=1, y=0. This is false-positive values, which are presented in the cell (0, 1) of the confusion matrix.
To bring them into line with each other, it is necessary to flip the matrix along the main diagonal, i.e. transpose it. This is why you see coordinate transposition when displaying the confusion matrix in the coordinate system of the plot layout.
I have a problem with the confusion matrix when i using the code in scikit-learn
this what i got
as you see the first class is cut
!!!update!!!
i force it work by using this rows
plt.xlim(-0.5, 5.5)
plt.ylim(5.5, -0.5)
and get this
but i still wants to know if there is other way to make it not specific to 5 classes.
i already try to change the ax size but it wasnt work out
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = list(unique_labels(y_true, y_pred))
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
plot_confusion_matrix(y, y_pred, classes=[0, 1, 2, 3, 4, 5], normalize=True,
title='Normalized confusion matrix')
i want that the box will not cut the first and last row
You need in this case to set xlim and ylim and here is an automatic way to do so for e.g. 10 classes.
Briefly, you need:
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
Full example:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = np.repeat(np.arange(0,10),15)
class_names = np.array(['1', '2', '3', '4', '5','6','7','8','9','10'])
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
return ax
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
I'm working on a classification problem with 20 classes. I'm trying to visualize the results through a confusion matrix using matplotlib.
After computing my confusion matrix, I used the plot_confusion_matrix described here.
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
Here is what it looks like :
It looks like the problem comes from dealing with too many classes, so a natural solution would be scalling up the plot. But doing that distorts it. Also, how do I choose the correct scale/size ?
How do I proceed to make it look better ?
P.S. You can find the confution matrix as a csv file here.
Since you dont specified the estrict use of matplotlib I recomend you to use the seaborn library its so much easy and simple and if you want to change something weird was constructed with matplolib if I aint wrong. Using seaborn is:
import seaborn as sns
plt.figure(figsize = (10,10)) #This is the size of the image
heatM = sns.heatmap(cov_vals, vmin = -1, vmax = 1,center = 0, cmap = sns.diverging_palette(20, 220, n = 200), square = True, annot = True) #this are the caracteristics of the heatmap
heatM.set_ylim([10,0]) # This is the limit in y axis (number of features)
and this is the result. be careful with the limits heatM.set_ylim([10,0]) for x too, this need to be the number of variables that you have.
hope this was useful.
I ended up using seaborn but I faced a problem. The confusion matrix looked like this. It was actually a bug in the latest version (3.1.1) of seaborn (see this issue). The solution was to use a prior version (3.1.0 in my case).
I'm working on plotting sklearn classification report and my plot generated is very narrow, and difficult to read the labels. I used the post here to get the plotting code.
Any suggestions on how to stretch this plot out horizontally? Thank you
def plot_classification_report(cr, title='Classification report ', with_avg_total=False, cmap=plt.cm.Blues):
lines = cr.split('\n')
classes = []
plotMat = []
for line in lines[2 : (len(lines) - 3)]:
#print(line)
t = line.split()
# print(t)
classes.append(t[0])
v = [float(x) for x in t[1: len(t) - 1]]
#print(v)
plotMat.append(v)
if with_avg_total:
aveTotal = lines[len(lines) - 1].split()
classes.append('avg/total')
vAveTotal = [float(x) for x in t[1:len(aveTotal) - 1]]
plotMat.append(vAveTotal)
plt.imshow(plotMat, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
x_tick_marks = np.arange(3)
y_tick_marks = np.arange(len(classes))
plt.xticks(x_tick_marks, ['precision', 'recall', 'f1-score'], rotation=45)
plt.yticks(y_tick_marks, classes)
#plt.tight_layout()
plt.ylabel('Classes')
plt.xlabel('Measures')
plot_classification_report(classification_report(y_test, y_pred))
By default, the axes will have the aspect ratio of the image. You can change that by using the aspect argument to imshow.
Either put it to "auto", to let the image extend to the given space to the axes.
Or, set it to any number, denoting the height over width ratio; number == height/width.
In this case try
plt.imshow(plotMat, interpolation='nearest', cmap=cmap, aspect="auto")
or
plt.imshow(plotMat, interpolation='nearest', cmap=cmap, aspect=len(classes)/12.)
and adapt it to your needs.
I am plotting a confusion matrix for a multiple labelled data, where labels look like:
label1: 1, 0, 0, 0
label2: 0, 1, 0, 0
label3: 0, 0, 1, 0
label4: 0, 0, 0, 1
I am able to classify successfully using the below code. I only need some help to plot confusion matrix.
for i in range(4):
y_train= y[:,i]
print('Train subject %d, class %s' % (subject, cols[i]))
lr.fit(X_train[::sample,:],y_train[::sample])
pred[:,i] = lr.predict_proba(X_test)[:,1]
I used the following code to print confusion matrix, but it always return a 2X2 matrix
prediction = lr.predict(X_train)
print(confusion_matrix(y_train, prediction))
I found a function that can plot the confusion matrix which generated from sklearn.
import numpy as np
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
It will look like this
This works the best for me :
from sklearn.metrics import multilabel_confusion_matrix
y_unique = y_test.unique()
mcm = multilabel_confusion_matrix(y_test, y_pred, labels = y_unique)
mcm
I see this is still an open issue in sklearn's repository:
https://github.com/scikit-learn/scikit-learn/issues/3452
However there have been some attempts at implementing it. From the same #3452 thread issue:
https://github.com/Magellanea/scikit-learn/commit/514287c1d5dad2f0ab4918dc4da5cf7053fe6734#diff-b04acd877dd793f28ae7be13a999ed88R187
You can check the code proposed in the function and see if that fits your needs.
from sklearn.metrics import multilabel_confusion_matrix
mul_c = multilabel_confusion_matrix(
test_Y,
pred_k,
labels=["benign", "dos","probe","r2l","u2r"])
mul_c
I found an easy solution with sklearn and seaborn libraries.
from sklearn.metrics import confusion_matrix, classification_report
from matplotlib import pyplot as plt
import seaborn as sns
def plot_confusion_matrix(y_test,y_scores, classNames):
y_test=np.argmax(y_test, axis=1)
y_scores=np.argmax(y_scores, axis=1)
classes = len(classNames)
cm = confusion_matrix(y_test, y_scores)
print("**** Confusion Matrix ****")
print(cm)
print("**** Classification Report ****")
print(classification_report(y_test, y_scores, target_names=classNames))
con = np.zeros((classes,classes))
for x in range(classes):
for y in range(classes):
con[x,y] = cm[x,y]/np.sum(cm[x,:])
plt.figure(figsize=(40,40))
sns.set(font_scale=3.0) # for label size
df = sns.heatmap(con, annot=True,fmt='.2', cmap='Blues',xticklabels= classNames , yticklabels= classNames)
df.figure.savefig("image2.png")
classNames = ['A', 'B', 'C', 'D', 'E']
plot_confusion_matrix(y_test,y_scores, classNames)
#y_test is your ground truth
#y_scores is your predicted probabilities
Just use pandas with gradient coloring:
cm = confusion_matrix(y_true, y_pred)
cm = pd.DataFrame(data=cm, columns = np.unique(y_true), index = np.unique(y_true))
cm = (cm / cm.sum(axis = 1).values.reshape(-1,1)) # to fractions of 1
cm.style.background_gradient().format(precision=2)
By now pandas has nice options for table formatting and decoration.