Why we flip coordinates when we plot confusion matrix - python

I am trying to plot the confusion matrix, but I have a question, why the indices are flipped in the for loop below where we start with plt.text(j, i) instead of plt.text(i, j)? I assume that 2D confusion matrix has values as follows:
(0,0): true zeros.
(0,1): false ones.
(1,0): false zeros.
(1,1): true ones.
Code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
from sklearn.metrics import confusion_matrix
import itertools
true = np.array([0, 1, 0])
pred = np.array([0., 0., 0.])
def plot_confusion_matrix(y_true, y_pred, title='', labels=[0, 1]):
cm = confusion_matrix(y_true, y_pred)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title(title)
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
fmt = 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="black" if cm[i, j] > thresh else "white")
plt.show()
plot_confusion_matrix(true, pred, title='Confusion Matrix')

The first coordinate of the plot usually is drawn horizontally, while the first coordinate of the matrix usually is represented vertically.
For example, the upper right square of the plot has coordinates x=1, y=0. This is false-positive values, which are presented in the cell (0, 1) of the confusion matrix.
To bring them into line with each other, it is necessary to flip the matrix along the main diagonal, i.e. transpose it. This is why you see coordinate transposition when displaying the confusion matrix in the coordinate system of the plot layout.

Related

How to overlay a pcolormesh with binary information in Python

Given a 2D array
172,47,117
192,67,251
195,103,9
211,21,242
The objective is to place markers (e.g., shape, line) overlapping to the 2D images with reference to a binary 2D coordinate below
0,1,0
0,0,0
0,1,0
1,1,0
Specifically, a marker will be place if the cell is equivalent to 1.
The expected output is as below. In this example, the marker is in the form of horizontal red line. However, the marker can be of any other shape that make the code more straight forward.
May I know how to achieve this with any graphical packages in Python?
The above coordinate can be reproduced by following
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
X=np.random.randint(256, size=(4, 3))
arrshape=np.random.randint(2, size=(4, 3))
fig = plt.figure(figsize=(8,6))
plt.pcolormesh(X,cmap="plasma")
plt.title("Plot 2D array")
plt.colorbar()
plt.show()
You can throw in a scatter:
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
plt.scatter(ys[:,u[:,1]].ravel()+.5,
xs[u[:,0]].ravel()+0.5,
marker='x', color='r', s=100)
Output:
One way to do it is with matplotlib
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
X=np.random.randint(256, size=(4, 3))
arrshape=np.random.randint(2, size=(4, 3))
fig = plt.figure(figsize=(8,6))
plt.pcolormesh(X,cmap="plasma")
plt.title("Plot 2D array")
plt.colorbar()
markers = [
[0,1,0],
[0,0,0],
[0,1,0],
[1,1,0]]
draw = []
for y, row in enumerate(markers):
prev = 0
for x, v in enumerate(row):
if v == 1:
plt.plot([x+0.25, x+0.75], [y+0.5, y+0.5], 'r-', linewidth=5)
if prev == 1:
plt.plot([x-0.5, x+0.5], [y+0.5, y+0.5], 'r-', linewidth=5)
prev = v
plt.show()
Output:
(Since your markers are upside-down)

Scikit-learn (sklearn) confusion matrix plot for more than 3 classes

I have a problem with the confusion matrix when i using the code in scikit-learn
this what i got
as you see the first class is cut
!!!update!!!
i force it work by using this rows
plt.xlim(-0.5, 5.5)
plt.ylim(5.5, -0.5)
and get this
but i still wants to know if there is other way to make it not specific to 5 classes.
i already try to change the ax size but it wasnt work out
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = list(unique_labels(y_true, y_pred))
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
plot_confusion_matrix(y, y_pred, classes=[0, 1, 2, 3, 4, 5], normalize=True,
title='Normalized confusion matrix')
i want that the box will not cut the first and last row
You need in this case to set xlim and ylim and here is an automatic way to do so for e.g. 10 classes.
Briefly, you need:
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
Full example:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = np.repeat(np.arange(0,10),15)
class_names = np.array(['1', '2', '3', '4', '5','6','7','8','9','10'])
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.xlim(-0.5, len(np.unique(y))-0.5)
plt.ylim(len(np.unique(y))-0.5, -0.5)
return ax
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()

How can I make my confusion matrix plot better?

I'm working on a classification problem with 20 classes. I'm trying to visualize the results through a confusion matrix using matplotlib.
After computing my confusion matrix, I used the plot_confusion_matrix described here.
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
Here is what it looks like :
It looks like the problem comes from dealing with too many classes, so a natural solution would be scalling up the plot. But doing that distorts it. Also, how do I choose the correct scale/size ?
How do I proceed to make it look better ?
P.S. You can find the confution matrix as a csv file here.
Since you dont specified the estrict use of matplotlib I recomend you to use the seaborn library its so much easy and simple and if you want to change something weird was constructed with matplolib if I aint wrong. Using seaborn is:
import seaborn as sns
plt.figure(figsize = (10,10)) #This is the size of the image
heatM = sns.heatmap(cov_vals, vmin = -1, vmax = 1,center = 0, cmap = sns.diverging_palette(20, 220, n = 200), square = True, annot = True) #this are the caracteristics of the heatmap
heatM.set_ylim([10,0]) # This is the limit in y axis (number of features)
and this is the result. be careful with the limits heatM.set_ylim([10,0]) for x too, this need to be the number of variables that you have.
hope this was useful.
I ended up using seaborn but I faced a problem. The confusion matrix looked like this. It was actually a bug in the latest version (3.1.1) of seaborn (see this issue). The solution was to use a prior version (3.1.0 in my case).

Plot aligned x,y 1d histograms from projected 2d histogram

I need to generate an image similar to the one shown in this example:
The difference is that, instead of having the scattered points in two dimensions, I have a two-dimensional histogram generated with numpy's histogram2d and plotted using with imshow and gridspec:
How can I project this 2D histogram into a horizontal and a vertical histogram (or curves) so that it looks aligned, like the first image?
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
data = # Uploaded to http://pastebin.com/tjLqM9gQ
# Create a meshgrid of coordinates (0,1,...,N) times (0,1,...,N)
y, x = np.mgrid[:len(data[0, :, 0]), :len(data[0, 0, :])]
# duplicating the grids
xcoord, ycoord = np.array([x] * len(data)), np.array([y] * len(data))
# compute histogram with coordinates as x,y
h, xe, ye = np.histogram2d(
xcoord.ravel(), ycoord.ravel(),
bins=[len(data[0, 0, :]), len(data[0, :, 0])],
weights=stars.ravel())
# Projected histograms inx and y
hx, hy = h.sum(axis=0), h.sum(axis=1)
# Define size of figure
fig = plt.figure(figsize=(20, 15))
gs = gridspec.GridSpec(10, 12)
# Define the positions of the subplots.
ax0 = plt.subplot(gs[6:10, 5:9])
axx = plt.subplot(gs[5:6, 5:9])
axy = plt.subplot(gs[6:10, 9:10])
ax0.imshow(h, cmap=plt.cm.viridis, interpolation='nearest',
origin='lower', vmin=0.)
# Remove tick labels
nullfmt = NullFormatter()
axx.xaxis.set_major_formatter(nullfmt)
axx.yaxis.set_major_formatter(nullfmt)
axy.xaxis.set_major_formatter(nullfmt)
axy.yaxis.set_major_formatter(nullfmt)
# Top plot
axx.plot(hx)
axx.set_xlim(ax0.get_xlim())
# Right plot
axy.plot(hy, range(len(hy)))
axy.set_ylim(ax0.get_ylim())
fig.tight_layout()
plt.savefig('del.png')
If you are ok with the marginal distributions all being upright, you could use corner
E.g.:
import corner
import numpy as np
import pandas as pd
N = 1000
CORNER_KWARGS = dict(
smooth=0.9,
label_kwargs=dict(fontsize=30),
title_kwargs=dict(fontsize=16),
truth_color="tab:orange",
quantiles=[0.16, 0.84],
levels=(1 - np.exp(-0.5), 1 - np.exp(-2), 1 - np.exp(-9 / 2.0)),
plot_density=False,
plot_datapoints=False,
fill_contours=True,
max_n_ticks=3,
verbose=False,
use_math_text=True,
)
def generate_data():
return pd.DataFrame(dict(
x=np.random.normal(0, 1, N),
y=np.random.normal(0, 1, N)
))
def main():
data = generate_data()
fig = corner.corner(data, **CORNER_KWARGS)
fig.show()
if __name__ == "__main__":
main()

Plot confusion matrix sklearn with multiple labels

I am plotting a confusion matrix for a multiple labelled data, where labels look like:
label1: 1, 0, 0, 0
label2: 0, 1, 0, 0
label3: 0, 0, 1, 0
label4: 0, 0, 0, 1
I am able to classify successfully using the below code. I only need some help to plot confusion matrix.
for i in range(4):
y_train= y[:,i]
print('Train subject %d, class %s' % (subject, cols[i]))
lr.fit(X_train[::sample,:],y_train[::sample])
pred[:,i] = lr.predict_proba(X_test)[:,1]
I used the following code to print confusion matrix, but it always return a 2X2 matrix
prediction = lr.predict(X_train)
print(confusion_matrix(y_train, prediction))
I found a function that can plot the confusion matrix which generated from sklearn.
import numpy as np
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
It will look like this
This works the best for me :
from sklearn.metrics import multilabel_confusion_matrix
y_unique = y_test.unique()
mcm = multilabel_confusion_matrix(y_test, y_pred, labels = y_unique)
mcm
I see this is still an open issue in sklearn's repository:
https://github.com/scikit-learn/scikit-learn/issues/3452
However there have been some attempts at implementing it. From the same #3452 thread issue:
https://github.com/Magellanea/scikit-learn/commit/514287c1d5dad2f0ab4918dc4da5cf7053fe6734#diff-b04acd877dd793f28ae7be13a999ed88R187
You can check the code proposed in the function and see if that fits your needs.
from sklearn.metrics import multilabel_confusion_matrix
mul_c = multilabel_confusion_matrix(
test_Y,
pred_k,
labels=["benign", "dos","probe","r2l","u2r"])
mul_c
I found an easy solution with sklearn and seaborn libraries.
from sklearn.metrics import confusion_matrix, classification_report
from matplotlib import pyplot as plt
import seaborn as sns
def plot_confusion_matrix(y_test,y_scores, classNames):
y_test=np.argmax(y_test, axis=1)
y_scores=np.argmax(y_scores, axis=1)
classes = len(classNames)
cm = confusion_matrix(y_test, y_scores)
print("**** Confusion Matrix ****")
print(cm)
print("**** Classification Report ****")
print(classification_report(y_test, y_scores, target_names=classNames))
con = np.zeros((classes,classes))
for x in range(classes):
for y in range(classes):
con[x,y] = cm[x,y]/np.sum(cm[x,:])
plt.figure(figsize=(40,40))
sns.set(font_scale=3.0) # for label size
df = sns.heatmap(con, annot=True,fmt='.2', cmap='Blues',xticklabels= classNames , yticklabels= classNames)
df.figure.savefig("image2.png")
classNames = ['A', 'B', 'C', 'D', 'E']
plot_confusion_matrix(y_test,y_scores, classNames)
#y_test is your ground truth
#y_scores is your predicted probabilities
Just use pandas with gradient coloring:
cm = confusion_matrix(y_true, y_pred)
cm = pd.DataFrame(data=cm, columns = np.unique(y_true), index = np.unique(y_true))
cm = (cm / cm.sum(axis = 1).values.reshape(-1,1)) # to fractions of 1
cm.style.background_gradient().format(precision=2)
By now pandas has nice options for table formatting and decoration.

Categories

Resources