How can I mask the lower triangle while hierarchical clustering with seaborn's clustermap?
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#pearson coefficients
corr = np.corrcoef(np.random.randn(10, 200))
#lower triangle
mask = np.tril(np.ones_like(corr))
fig, ax = plt.subplots(figsize=(6,6))
#heatmap works as expected
sns.heatmap(corr, cmap="Blues", mask=mask, cbar=False)
#clustermap not so much
sns.clustermap(corr, cmap="Blues", mask=mask, figsize=(6,6))
plt.show()
Well, the clustermap clusters the values according to similarity. This changes the order of the rows and the columns.
You could create a regular clustermap, and in a second step apply the mask:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
corr = np.corrcoef(np.random.randn(10, 200))
g = sns.clustermap(corr, cmap="Blues", figsize=(6, 6))
mask = np.tril(np.ones_like(corr))
values = g.ax_heatmap.collections[0].get_array().reshape(corr.shape)
new_values = np.ma.array(values, mask=mask)
g.ax_heatmap.collections[0].set_array(new_values)
plt.show()
Related
Assume we have a heat-map as below
construct using the code
import string
import numpy as np
from matplotlib import pyplot as plt
label=list(string.ascii_uppercase)
mdata = np.random.randn(3, len(label), len(label))
data = mdata[0, :, :]
data=np.tril(data,-1)
fig, ax = plt.subplots()
heatmap = ax.pcolor(data, cmap=plt.cm.Blues)
plt.show()
Is possible whether using Matplotlib, Seaborn or any other package to render into isometric
alignment as below.
With matplotlib's 3D toolkit, and using numpy's triu_indices, you could create a bar plot from the triangular matrix:
import numpy as np
import matplotlib.pyplot as plt
ax = plt.figure().add_subplot(projection='3d')
N = 26
data = np.random.randn(3, N, N)
for i, (plane, cmap) in enumerate(zip(data, ['Reds', 'Greens', 'Blues'])):
indices = np.triu_indices(N, 1)
norm = plt.Normalize(plane.min(), plane.max())
ax.bar(left=indices[0], bottom=indices[1], height=0.9,
zs=i, zdir='y',
color=plt.get_cmap(cmap)(norm(plane[indices])))
plt.show()
PS: To have full rectangles, the sub-arrays from np.indices need to be made 1D:
import numpy as np
import matplotlib.pyplot as plt
ax = plt.figure().add_subplot(projection='3d')
N = 26
data = np.random.randn(3, N, N)
for i, (plane, cmap) in enumerate(zip(data, ['Reds', 'Greens', 'Blues'])):
indices = np.indices((N,N))
norm = plt.Normalize(plane.min(), plane.max())
ax.bar(left=indices[0].ravel(), bottom=indices[1].ravel(), height=0.9,
zs=i, zdir='y',
color=plt.get_cmap(cmap)(norm(plane).ravel()))
plt.show()
Below code gives me the heatmap output. but I want to add rectangle patches to highlight values in the range of 0.4 to 0.99 and -0.4 to -0.99
plt.figure(figsize=(15,10))
mask = np.triu(np.ones_like(corr, dtype=np.bool))
sns.heatmap(corr,annot=True,fmt=".2f", mask=mask,cmap="YlGnBu");
The heatmap data for the categorical variables was taken from Kaggle's home price data. To add a rectangle, add a rectangle to add_patch(). The coordinates are based on the lower left corner, so specify the x and y of each in tuples, and specify the width and height. We also specify not to fill it.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
fig, ax = plt.subplots(figsize=(18,18))
df_house = pd.read_csv('./data/house_prices_train.csv', index_col=0)
df_house_corr = df_house.corr()
mask = np.triu(np.ones_like(df_house_corr, dtype=np.bool))
sns.heatmap(df_house_corr, annot=True, fmt=".2f", mask=mask, cmap="YlGnBu")
ax.add_patch(
patches.Rectangle(
(5, 6),
1.0,
35.0,
edgecolor='red',
fill=False,
lw=2
) )
plt.show()
Ok so without the data I made the solution with the values of a uniform distribution. Copy-paste your data in the script and it should work as long as they are of NumPy array-like type.
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import seaborn as sns
fig, ax = plt.subplots(figsize=(15, 10))
data_len = 17
uniform_data = np.random.rand(data_len, data_len)
# np.bool is deprecated in Numpy 1.20
mask = np.triu(np.ones_like(uniform_data, dtype=bool))
heatmap = sns.heatmap(uniform_data, annot=True, fmt='.2f', mask=mask, cmap='YlGnBu', ax=ax)
indices_tuple = np.tril_indices(n=data_len, k=-1)
# first array of indices_tuple: indices on column
# second array of indices_tuple: indices on lines
for col_index, line_index in zip(indices_tuple[0], indices_tuple[1]):
if (np.abs(uniform_data[line_index, col_index]) <= 0.99) and (np.abs(uniform_data[line_index, col_index]) >= 0.4):
rect = patches.Rectangle((line_index, col_index), 1, 1, fill=True, facecolor='red', alpha=0.5)
ax.add_patch(rect)
plt.show()
The idea is to get the indices of all the values of the lower triangle to prevent looping through unnecessary values. The latter values are inspected and if the condition is met, a rectangle is drawn at its position.
You get the following result:
If I correctly understood your problem, this script should do the trick.
I am able to make histogram in python but I am unable to add density curve , I see many code which are using different ways to add density curve on histogram but I am not sure how to get on my code
I have added density = true but not able to get density curve on histogram
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X=df['A']
hist, bins = np.histogram(X, bins=10,density=True)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.show()
Here is an approach using distplot method of seaborn. Also, mentioned in the comments:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.distplot(X, kde=True, bins=20, hist=True)
plt.show()
However, distplot will be removed in a future version of seaborn. Therefore, alternatives are to use histplot and displot.
sns.histplot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.histplot(X, kde=True, bins=20)
plt.show()
sns.displot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.displot(X, kde=True, bins=20)
plt.show()
Pandas also has kde plot:
hist, bins = np.histogram(X, bins=10,density=True)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width, zorder=1)
# density plot
df['A'].plot.kde(zorder=2, color='C1')
plt.show()
Output:
I would like to create a plot where dots are overlaid depending on whether or not they are within the 1st-3rd quartiles in seaborn. What function to use?
Something similar to the figure:
The following code creates a Seaborn swarmplot and then recolors the dots depending on their quartile. Looping through the collections created by the swarmplot, the y-data are retrieved. np.percentile calculates the borders of the quartiles and np.digitize calculates the corresponding quartiles. These quartiles can be used to define the color.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
# cmap = plt.get_cmap('tab10')
cmap = ListedColormap(['gold', 'crimson', 'teal', 'orange'])
ax = sns.swarmplot(x="day", y="total_bill", data=tips)
for col in ax.collections:
y = col.get_offsets()[:,1]
perc = np.percentile(y, [25, 50, 75])
col.set_cmap(cmap)
col.set_array(np.digitize(y, perc))
plt.show()
The same approach can be used for a stripplot (optionally without jitter) to create a plot similar to the one in the question.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
sns.set(style="whitegrid")
N = 200
x = np.repeat(list('abcdefg'), N)
y = np.random.normal(np.repeat(np.random.uniform(11, 15, 7), N), 1)
cmap = ListedColormap(['grey', 'turquoise', 'grey'])
ax = sns.stripplot(x=x, y=y, jitter=False, alpha=0.2)
for col in ax.collections:
y = col.get_offsets()[:, 1]
perc = np.percentile(y, [25, 75])
col.set_cmap(cmap)
col.set_array(np.digitize(y, perc))
plt.show()
I am using Seaborn heatmap to plot the output of a large confusion matrix. Since the diagonal element represents the correct prediction, they are more important to show the number/correct rate. As the question suggests, how to annotate only the diagonal entries in a heatmap?
I have consulted this website https://seaborn.pydata.org/examples/many_pairwise_correlations.html, but it does not help with how to annotate only the diagonal entries. Hope somebody could help with that. Thank you in advance!
Does this help you in getting what you have in mind? The URL example given by you does not have a diagonal, I had annotated the diagonal below the main diagonal instead. To annotate your confusion matrix diagonal, you can adapt to my code by changing the -1 value in np.diag(..., -1) to 0.
Note the additional parameter fmt='' that I had added in sns.heatmap(...) because my annot matrix elements are strings.
Code
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
# Generate a large random dataset
rs = np.random.RandomState(33)
y = rs.normal(size=(100, 26))
d = pd.DataFrame(data=y, columns=list(ascii_letters[26:]))
# Compute the correlation matrix
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype='bool')
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Generate the annotation
annot = np.diag(np.diag(corr.values,-1),-1)
annot = np.round(annot,2)
annot = annot.astype('str')
annot[annot=='0.0']=''
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5}, annot=annot, fmt='')
plt.show()
Output
In a related question, someone asked how to annotate the diagonal elements with strings. Here is an example:
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
flights = sns.load_dataset('flights')
flights = flights.pivot('year', 'month', 'passengers')
corr_data = np.corrcoef(flights.to_numpy())
up_triang = np.triu(np.ones_like(corr_data)).astype(bool)
ax = sns.heatmap(corr_data, cmap='flare', xticklabels=False, yticklabels=False, square=True,
linecolor='white', linewidths=0.5,
cbar=True, mask=up_triang, cbar_kws={'shrink': 0.6, 'pad': 0.02, 'label': 'correlation'})
ax.invert_xaxis()
for i, label in enumerate(flights.index):
ax.text(i + 0.2, i + 0.5, label, ha='right', va='center')
plt.show()