How to add rectangle patches in Python Heatmap? - python

Below code gives me the heatmap output. but I want to add rectangle patches to highlight values in the range of 0.4 to 0.99 and -0.4 to -0.99
plt.figure(figsize=(15,10))
mask = np.triu(np.ones_like(corr, dtype=np.bool))
sns.heatmap(corr,annot=True,fmt=".2f", mask=mask,cmap="YlGnBu");

The heatmap data for the categorical variables was taken from Kaggle's home price data. To add a rectangle, add a rectangle to add_patch(). The coordinates are based on the lower left corner, so specify the x and y of each in tuples, and specify the width and height. We also specify not to fill it.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
fig, ax = plt.subplots(figsize=(18,18))
df_house = pd.read_csv('./data/house_prices_train.csv', index_col=0)
df_house_corr = df_house.corr()
mask = np.triu(np.ones_like(df_house_corr, dtype=np.bool))
sns.heatmap(df_house_corr, annot=True, fmt=".2f", mask=mask, cmap="YlGnBu")
ax.add_patch(
patches.Rectangle(
(5, 6),
1.0,
35.0,
edgecolor='red',
fill=False,
lw=2
) )
plt.show()

Ok so without the data I made the solution with the values of a uniform distribution. Copy-paste your data in the script and it should work as long as they are of NumPy array-like type.
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import seaborn as sns
fig, ax = plt.subplots(figsize=(15, 10))
data_len = 17
uniform_data = np.random.rand(data_len, data_len)
# np.bool is deprecated in Numpy 1.20
mask = np.triu(np.ones_like(uniform_data, dtype=bool))
heatmap = sns.heatmap(uniform_data, annot=True, fmt='.2f', mask=mask, cmap='YlGnBu', ax=ax)
indices_tuple = np.tril_indices(n=data_len, k=-1)
# first array of indices_tuple: indices on column
# second array of indices_tuple: indices on lines
for col_index, line_index in zip(indices_tuple[0], indices_tuple[1]):
if (np.abs(uniform_data[line_index, col_index]) <= 0.99) and (np.abs(uniform_data[line_index, col_index]) >= 0.4):
rect = patches.Rectangle((line_index, col_index), 1, 1, fill=True, facecolor='red', alpha=0.5)
ax.add_patch(rect)
plt.show()
The idea is to get the indices of all the values of the lower triangle to prevent looping through unnecessary values. The latter values are inspected and if the condition is met, a rectangle is drawn at its position.
You get the following result:
If I correctly understood your problem, this script should do the trick.

Related

How to overlay two 2D-histograms in Matplotlib?

I have two datasets (corresponding with the time-positional data of hydrogen atoms and time-positional data of alumina atoms) in the same system.
I want to plot the density of each element by overlaying two hist2d plots using matplotlib.
I am currently doing this by setting an alpha value on the second hist2d:
fig, ax = plt.subplots(figsize=(4, 4))
v = ax.hist2d(x=alx, y=aly,
bins=50, cmap='Reds')
h = ax.hist2d(x=hx, y=hy,
bins=50, cmap='Blues',
alpha=0.7)
ax.set_title('Adsorption over time, {} K'.format(temp))
ax.set_xlabel('picoseconds')
ax.set_ylabel('z-axis')
fig.colorbar(h[3], ax=ax)
fig.savefig(savename, dpi=300)
I do get the plot that I want, however the colors seem washed out due to the alpha value.
Is there a more correct way to do generate such plots?
One way to achieve this would be a to add fading alphas towards lower levels to the existing color maps:
import numpy as np
import matplotlib.pylab as pl
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
# modify existing Reds colormap with a linearly fading alpha
red = pl.cm.Reds # original colormap
fading_red = red(np.arange(red.N)) # extract colors
fading_red[:, -1] = np.linspace(0, 1, red.N) # modify alpha
fading_red = ListedColormap(fading_red) # convert to colormap
# data generation
random_1 = np.random.randn(10000)+1
random_2 = np.random.randn(10000)+1
random_3 = np.random.randn(10000)
random_4 = np.random.randn(10000)
# plot
fig, ax = plt.subplots(1,1)
plt.hist2d(x=random_3, y=random_4, bins=100, cmap="Blues")
plt.hist2d(x=random_1, y=random_2, bins=50, cmap=fading_red)
plt.show()

How to label quartiles in matplotlib boxplots?

I have a list of values which I want to plot the distribution for. I'm using a box-plot but it would be nice to add some dotted lines going from the boxplot quartiles to the axis. Also I want just the quartile values displayed on the x ticks.
Here's a rough idea but with values at the end instead of names.
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
vel_arr = np.random.rand(1000,1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
# Create the boxplot
ax.boxplot(vel_arr,vert=False, manage_ticks=True)
ax.set_xlabel('value')
plt.yticks([1], ['category'])
plt.show()
np.quantile calculates the desired quantiles.
ax.vlines draws vertical lines, for example from the center of the boxplot to y=0. zorder=0 makes sure these lines go behind the boxplot.
ax.set_ylim(0.5, 1.5) resets the ylims. Default, the vlines force the ylims with some extra padding.
ax.set_xticks(quantiles) sets xticks at the position of every quantile.
import numpy as np
import matplotlib.pylab as plt
vel_arr = np.random.rand(50, 1)
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
ax.boxplot(vel_arr, vert=False, manage_ticks=True)
ax.set_xlabel('value')
ax.set_yticks([1])
ax.set_yticklabels(['category'])
quantiles = np.quantile(vel_arr, np.array([0.00, 0.25, 0.50, 0.75, 1.00]))
ax.vlines(quantiles, [0] * quantiles.size, [1] * quantiles.size,
color='b', ls=':', lw=0.5, zorder=0)
ax.set_ylim(0.5, 1.5)
ax.set_xticks(quantiles)
plt.show()

How to annotate only the diagonal elements of a seaborn heatmap

I am using Seaborn heatmap to plot the output of a large confusion matrix. Since the diagonal element represents the correct prediction, they are more important to show the number/correct rate. As the question suggests, how to annotate only the diagonal entries in a heatmap?
I have consulted this website https://seaborn.pydata.org/examples/many_pairwise_correlations.html, but it does not help with how to annotate only the diagonal entries. Hope somebody could help with that. Thank you in advance!
Does this help you in getting what you have in mind? The URL example given by you does not have a diagonal, I had annotated the diagonal below the main diagonal instead. To annotate your confusion matrix diagonal, you can adapt to my code by changing the -1 value in np.diag(..., -1) to 0.
Note the additional parameter fmt='' that I had added in sns.heatmap(...) because my annot matrix elements are strings.
Code
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
# Generate a large random dataset
rs = np.random.RandomState(33)
y = rs.normal(size=(100, 26))
d = pd.DataFrame(data=y, columns=list(ascii_letters[26:]))
# Compute the correlation matrix
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype='bool')
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Generate the annotation
annot = np.diag(np.diag(corr.values,-1),-1)
annot = np.round(annot,2)
annot = annot.astype('str')
annot[annot=='0.0']=''
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5}, annot=annot, fmt='')
plt.show()
Output
In a related question, someone asked how to annotate the diagonal elements with strings. Here is an example:
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
flights = sns.load_dataset('flights')
flights = flights.pivot('year', 'month', 'passengers')
corr_data = np.corrcoef(flights.to_numpy())
up_triang = np.triu(np.ones_like(corr_data)).astype(bool)
ax = sns.heatmap(corr_data, cmap='flare', xticklabels=False, yticklabels=False, square=True,
linecolor='white', linewidths=0.5,
cbar=True, mask=up_triang, cbar_kws={'shrink': 0.6, 'pad': 0.02, 'label': 'correlation'})
ax.invert_xaxis()
for i, label in enumerate(flights.index):
ax.text(i + 0.2, i + 0.5, label, ha='right', va='center')
plt.show()

seaborn heatmap get array of color codes values

I am trying to get the color codes associated with each cell of a heatmap:
import seaborn as sns
import numpy as np
import matplotlib.cm as cm
hm = sns.heatmap(
np.random.randn(10,10),
cmap = cm.coolwarm)
# hm.<some function>[0][0] would return the color code of the cell indexed (0,0)
Because sns.heatmap returns a matplotlib axis object, we can't really use hm directly. But we can use the cmap object itself to return the rgba values of the data. Edit Code has been updated to include normalization of data.
from matplotlib.colors import Normalize
data = np.random.randn(10, 10)
cmap = cm.get_cmap('Greens')
hm = sns.heatmap(data, cmap=cmap)
# Normalize data
norm = Normalize(vmin=data.min(), vmax=data.max())
rgba_values = cmap(norm(data))
All of the colors are now contained in rgba_values. So to get the color of the upper left square in the heatmap you could simply do
In [13]: rgba_values[0,0]
Out[13]: array([ 0. , 0.26666668, 0.10588235, 1. ])
For more, check out Getting individual colors from a color map in matplotlib
Update
To readjust the colormap from using the center and robust keywords in the call to sns.heatmap, you basically just have to redefine vmin and vmax. Looking at the relevant seaborn source code (http://github.com/mwaskom/seaborn/blob/master/seaborn/matrix.py#L202), the below changes to vmin and vmax should do the trick.
data = np.random.randn(10, 10)
center = 2
robust = False
cmap = cm.coolwarm
hm = sns.heatmap(data, cmap=cmap, center=center, robust=robust)
vmin = np.percentile(data, 2) if robust else data.min()
vmax = np.percentile(data, 98) if robust else data.max()
vmin += center
vmax += center
norm = Normalize(vmin=vmin, vmax=vmax)
rgba_values = cmap(norm(data))
Without any knowledge on the input data and arguments of heatmap you can get the colors from the underlying QuadMesh, knowing that the heatmap should be the first and only collection inside the axes that is returned by heatmap.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
data = np.array([[0,-2],[10,5]])
ax = sns.heatmap(data, center=0, cmap="bwr", robust=False)
im = ax.collections[0]
rgba_values = im.cmap(im.norm(im.get_array()))
Also see this answer. In contrast to AxesImage, QuadMesh returns a list of colors. Hence the above code will give you a 2D array where the columns are the RGBA color channels. If you need a 3D output with the first two dimensions being the same as the input data you would need to reshape
rgba_values = rgba_values.reshape((im._meshHeight, im._meshWidth, 4))

How to plot pandas kde with a horizontal orientation

Pandas offers kind='kde' when plotting. In my setting, I would prefer a kde density. The alternative kind='histogram' offers the orientation option: orientation='horizontal', which is strictly necessary for what I am doing. Unfortunately, orientation is not available for kde.
At least this is what I think that happens because I get a
in set_lineprops
raise TypeError('There is no line property "%s"' % key)
TypeError: There is no line property "orientation"
Is there any straight forward alternative for plotting kde horizontally as easily as it can be done for histogram?
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.ion()
ser = pd.Series(np.random.random(1000))
ax1 = plt.subplot(2,2,1)
ser.plot(ax = ax1, kind = 'hist')
ax2 = plt.subplot(2,2,2)
ser.plot(ax = ax2, kind = 'kde')
ax3 = plt.subplot(2,2,3)
ser.plot(ax = ax3, kind = 'hist', orientation = 'horizontal')
# not working lines below
ax4 = plt.subplot(2,2,4)
ser.plot(ax = ax4, kind = 'kde', orientation = 'horizontal')
Adding previously deleted answer as a community wiki because it's a helpful answer.
pandas.Series.plot.kde does not have an option to change the orientation of the plot.
Use scipy.stats.gaussian_kde to calculate the values, and plot them on a line with matplotlib.axes.Axes.plot.
Alternatively, seaborn.kdeplot is an option.
gaussian_kde is used under the hood by both .plot.kde and sns.kdeplot
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import gaussian_kde
# crate subplots and don't share x and y axis ranges
fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=False, sharey=False)
# flatten the axes for easy selection from a 1d array
axes = axes.flat
# create sample data
np.random.seed(2022)
ser = pd.Series(np.random.random(1000)).sort_values()
# plot example plots
ser.plot(ax=axes[0], kind='hist', ec='k')
ser.plot(ax=axes[1], kind='kde')
ser.plot(ax=axes[2], kind='hist', orientation='horizontal', ec='k')
# 1. create kde model
gkde = gaussian_kde(ser)
# 2. create a linspace to match the range over which the kde model is plotted
xmin, xmax = ax2.get_xlim()
x = np.linspace(xmin, xmax, 1000)
# 3. plot the values
axes[3].plot(gkde(x), x)
# Alternatively, use seaborn.kdeplot and skip 1., 2., and 3.
# sns.kdeplot(y=ser, ax=axes[3])

Categories

Resources