Add row-wise accuracy to a seaborn heatmap - python

import seaborn as sb
import numpy as np
from matplotlib import pyplot as plt
A = np.array([[10, 5], [3, 10]], dtype=np.int32)
plt.figure()
sb.heatmap(
A,
square=True,
annot=True,
xticklabels=["False", "Positive"],
yticklabels=["False", "Positive"],
cbar=False,
fmt="2d",
)
plt.title("Example plot")
plt.show()
Shows example of an heatmap. I wish to add accuracy of each row to left side of the image.
The plot should be similar to
Can this be achived?

You can add the following lines to you code between the heatmap call and plt.title(...:
# Compute the values to added to the plot
row_accuracies = [A[i][i] * 100 / A[i].sum() for i in range(A.shape[0])]
# Get axes
ax = fig.axes
# [OPTIONAL] Add ticks on the right side
ax.tick_params(axis='y', which='major', left=True, right=True, labelleft=True, labelright=False)
# Add text where the ticks are (roughly)
for i, acc in enumerate(row_accuracies):
ax.text(ax.get_xlim()[1] * 1.05, ax.get_yticks()[i] * 1.01, f'{acc:.2f}%')
This is the result:

Related

How to customize the location of color bar in Seaborn heatmap?

I have the following code to create a heatmap. However, it creates an overlap of the color bar and the right axis text. The text has no problems, I want it to be in that length.
How can I locate the colorbar on the right/left side of the heatmap with no overlap?
I tried with "pad" parameter in cbar_kws but it didn't help.enter image description here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT=pd.DataFrame(np.random.randn(300,3), columns=list('ABC'))
miniPT=PT.iloc[:,:-1]
SMALL_SIZE = 8
MEDIUM_SIZE = 80
BIGGER_SIZE = 120
plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
plt.figure(figsize=(10, miniPT.shape[0]/5.2))
ax =sns.heatmap(miniPT, annot=False, cmap='RdYlGn')
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list=np.asarray(PT['C'])
asset_list=asset_list[::-1]
ax3 = ax.twinx()
ax3.set_ylim([0,ax.get_ylim()[1]])
ax3.set_yticks(ax.get_yticks())
ax3.set_yticklabels(asset_list, fontsize=MEDIUM_SIZE*0.6)
# colorbar
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=MEDIUM_SIZE)
One way to get the overlap automatically adjusted by matplotlib, is to explicitly create subplots: one for the heatmap and another for the colorbar. sns.heatmap's cbar_ax= parameter can be set to point to this subplot. gridspec_kws= is needed to set the relative sizes. At the end, plt.tight_layout() will adjust all the paddings to make everything fit nicely.
The question's code contains some strange settings (e.g. a fontsize of 80 is immense). Also, 300 rows will inevitably lead to overlapping text (the fontsize needs to be so small that non-overlapping text wouldn't be readable). Here is some more simplified example code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(100, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [10, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
As the plot is quite large, here only the bottom part is pasted, with a link to the full plot.
This is how it would look like with:
fontsize 80 (Note that font sizes are measured in "points per inch", standard 72 points per inch);
figure width of 20 inches (instead of 10);
300 rows
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(20, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
My solution was eventually move the colorbar to left side. This is the code and the output:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 15]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=ax0, ax=ax1)
for _, spine in ax1.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax1.twinx()
ax3.set_ylim(ax1.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
ax0.tick_params(labelsize=80)
plt.tight_layout()
plt.show()

How to label quartiles in matplotlib boxplots?

I have a list of values which I want to plot the distribution for. I'm using a box-plot but it would be nice to add some dotted lines going from the boxplot quartiles to the axis. Also I want just the quartile values displayed on the x ticks.
Here's a rough idea but with values at the end instead of names.
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
vel_arr = np.random.rand(1000,1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
# Create the boxplot
ax.boxplot(vel_arr,vert=False, manage_ticks=True)
ax.set_xlabel('value')
plt.yticks([1], ['category'])
plt.show()
np.quantile calculates the desired quantiles.
ax.vlines draws vertical lines, for example from the center of the boxplot to y=0. zorder=0 makes sure these lines go behind the boxplot.
ax.set_ylim(0.5, 1.5) resets the ylims. Default, the vlines force the ylims with some extra padding.
ax.set_xticks(quantiles) sets xticks at the position of every quantile.
import numpy as np
import matplotlib.pylab as plt
vel_arr = np.random.rand(50, 1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
ax.boxplot(vel_arr, vert=False, manage_ticks=True)
ax.set_xlabel('value')
ax.set_yticks([1])
ax.set_yticklabels(['category'])
quantiles = np.quantile(vel_arr, np.array([0.00, 0.25, 0.50, 0.75, 1.00]))
ax.vlines(quantiles, [0] * quantiles.size, [1] * quantiles.size,
color='b', ls=':', lw=0.5, zorder=0)
ax.set_ylim(0.5, 1.5)
ax.set_xticks(quantiles)
plt.show()

Seaborn scatterplot legend showing true values and normalized continuous color

I have a dataframe that I'd like to use to build a scatterplot where different points have different colors:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dat=pd.DataFrame(np.random.rand(20, 2), columns=['x','y'])
dat['c']=np.random.randint(0,100,20)
dat['c_norm']=(dat['c']-dat['c'].min())/(dat['c'].max()-dat['c'].min())
dat['group']=np.append(np.repeat('high',10), np.repeat('low',10))
As you can see, the column c_norm shows the c column has been normalized between 0 and 1. I would like to show a continuous legend whose color range reflect the normalized values, but labeled using the original c values as label. Say, the minimum (1), the maximum (86), and the median (49). I also want to have differing markers depending on group.
So far I was able to do this:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
for row in dat.index:
if(dat.loc[row,'group']=='low'):
i_marker='.'
else:
i_marker='x'
ax.scatter(
x=dat.loc[row,'x'],
y=dat.loc[row,'y'],
s=50, alpha=0.5,
marker=i_marker
)
ax.legend(dat['c_norm'], loc='center right', bbox_to_anchor=(1.5, 0.5), ncol=1)
Questions:
- How to generate a continuous legend based on the values?
- How to adapt its ticks to show the original ticks in c, or at least a min, max, and mean or median?
Thanks in advance
Partial answer. Do you actually need to determine your marker colors based on the normed values? See the output of the snippet below.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dat = pd.DataFrame(np.random.rand(20, 2), columns=['x', 'y'])
dat['c'] = np.random.randint(0, 100, 20)
dat['c_norm'] = (dat['c'] - dat['c'].min()) / (dat['c'].max() - dat['c'].min())
dat['group'] = np.append(np.repeat('high', 10), np.repeat('low', 10))
fig, (ax, bx) = plt.subplots(nrows=1, ncols=2, num=0, figsize=(16, 8))
mask = dat['group'] == 'low'
scat = ax.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c'][mask],
marker='s', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
ax.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c'][~mask],
marker='X', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
cbar = fig.colorbar(scat, ax=ax)
scat = bx.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c_norm'][mask],
marker='s', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
bx.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c_norm'][~mask],
marker='X', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
cbar2 = fig.colorbar(scat, ax=bx)
plt.show()
You could definitely modify the second colorbar so that it matches the first one, but is that necessary?

All gridlines below plot line - with twin x-axis [Matplotlib]

I'm using matplotlib to produce a plot where I want to show labels on the right and left y-axis. You will notice by running the code that the grid-lines formed by the right-side y-axis appear on top of the plot line, where the left-side lines appear below. I would like them all to appear below the plot. I've tried zorder and set_axisbelow(True) without success.
Example code below:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
t = np.linspace(0,5)
x = np.exp(-t)*np.sin(2*t)
fig, ax1 = plt.subplots()
ax1.plot(t, x)
ax2 = ax1.twinx()
ax2.plot(t, x, alpha=0.0)
ax1.set_xticks([0,1,2])
ax1.set_yticks([0.1, 0.2])
ax2.set_yticks([0.3, 0.4, 0.5])
ax1.grid(True, color='lightgray')
ax2.grid(True, color='lightgray')
for a in [ax1, ax2]:
a.spines["top"].set_visible(False)
a.spines["right"].set_visible(False)
a.spines["left"].set_visible(False)
a.spines["bottom"].set_visible(False)
ax1.set_axisbelow(True)
ax2.set_axisbelow(True)
plt.savefig('fig.pdf')
plt.show()

Colorbar tick labels as log outputs

I am playing around with histogram2d and I am trying incorporate a color bar logarithmic values.
Here is my current code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list('mycmap', ['black', 'maroon',
'crimson', 'orange', 'white'])
fig = plt.figure()
ax = fig.add_subplot(111)
H = ax.hist2d(gas_pos[:,0]/0.7, gas_pos[:,1]/0.7, cmap=cmap,
norm=matplotlib.colors.LogNorm(), bins=350, weights=np.log(gas_Temp))
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
cb = fig.colorbar(H[3], ax=ax, shrink=0.8, pad=0.01,
orientation="horizontal", label=r'$\log T\ [\mathrm{K}]$')
cb.ax.set_xticklabels([1,2,3,4])
cb.update_ticks()
empty = Rectangle((0,0 ), 0, 0, alpha=0.0)
redshift = fig.legend([empty], [r'$z = 127$'],
loc='upper right', frameon=False, handlelength=0, handletextpad=0)
redshift.get_texts()[0].set_color('white')
#fig.add_artist(redshift)
plt.show()
The weights are values not passed through np.log() and are currently being normalized through LogNorm().
What I am trying to get is to have the colorbar tic labels to be the logarithmic values of what is currently there eg. 10**4 --> 4, 10**6 --> 6, etc.
I have tried changing the formatting and also passing through the logarithmic values of np.log(gas_Temp), but nothing is really working.
The idiomatic thing to do is use a LogFormatterExponent to do the formatting of your colorbar. That's exactly what you need: to display 10**x values as x, or in other words, to display y values as log10(x).
Proof using dummy data:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.ticker import LogFormatterExponent # <-- one new import here
# generate dummy data
histdata = 10**(np.random.rand(200,200)*4 + 1) # 10^1 -> 10^5
# plot
fig = plt.figure()
ax = fig.add_subplot(111)
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
im = plt.imshow(histdata,cmap='viridis',norm=LogNorm())
cb = fig.colorbar(im, ax=ax, shrink=0.8, pad=0.01,
orientation="horizontal", label=r'$\log T\ [\mathrm{K}]$')
# v-- one new line here
cb.formatter = LogFormatterExponent(base=10) # 10 is the default
cb.update_ticks()
Compare the result of your original (left) with the modified version (right):

Categories

Resources