I tried to visualize my data with seaborn.heatmap.
However, the problem I have is that when I print it out in grayscle, the image is hard to read.
I follow many similar questions but it didn't work.
Is there anyway to add hatches over the cells in seaborn.heatmap?
My code is as below:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.read_csv("file.csv")
sns.heatmap(df, annot=False, fmt='.0f', square=True,
cmap="coolwarm", linewidths=1, cbar=False)
plt.show()
You could create a loop, dividing the values into e.g. 4 groups and assign a hatch pattern to each of them via pcolor applied to the subset.
Here is an example starting from random test data:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
column_names = [f'{c:.2f}' for c in np.arange(0, 1.5001, 0.05)]
row_names = ['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe']
df = pd.DataFrame(np.random.normal(0.3, 1, (len(row_names), len(column_names))).cumsum(axis=1) + 5,
columns=column_names, index=row_names)
values = df.values
vmin = values.min()
vmax = values.max()
patterns = ['', 'oo', '////', 'XXX']
bounds = np.linspace(vmin, vmax, len(patterns) + 1)
bounds[-1] += 1
sns.set_style('white')
fig, ax = plt.subplots(figsize=(12, 5))
sns.heatmap(data=df, linewidths=1, square=True, cmap='coolwarm', linecolor='white', cbar=False, ax=ax)
x = np.arange(df.shape[1] + 1)
y = np.arange(df.shape[0] + 1)
handles = []
norm = plt.Normalize(vmin, vmax)
cmap = plt.get_cmap('coolwarm')
for pattern, b0, b1 in zip(patterns, bounds[:-1], bounds[1:]):
ax.pcolor(x, y, np.where((values >= b0) & (values < b1), values, np.nan), cmap=cmap, norm=norm,
hatch=pattern, ec='black', lw=1)
handles.append(plt.Rectangle((0, 0), 0, 0, color=cmap(norm((b0 + b1) / 2)), ec='black',
hatch=pattern, label=f'{b0:5.2f}-{b1:5.2f}'))
ax.hlines(y, 0, x.max(), color='w', lw=2)
ax.vlines(x, 0, y.max(), color='w', lw=2)
ax.legend(handles=handles, bbox_to_anchor=(1.01, 1.02), loc='upper left',
handlelength=2, handleheight=2, frameon=False)
plt.tight_layout()
plt.show()
I have the following code that generates a Heatmap in Pandas:
def create_cohort(cohort_size,retention_matrix,titulo):
print(f"{titulo}\n")
with sns.axes_style("white"):
fig, ax = plt.subplots(1, 2, figsize=(12, 8), sharey=True, gridspec_kw={'width_ratios': [1, 11]})
# retention matrix
sns.heatmap(retention_matrix,
mask=retention_matrix.isnull(),
annot=True,
fmt='.0%',
cmap='Purples',
ax=ax[1])
ax[1].set_title(f'Cohort: {titulo}', fontsize=16)
ax[1].set(xlabel='Meses',
ylabel='')
# cohort size
cohort_size_df = pd.DataFrame(cohort_size).rename(columns={0: 'Tamanho da cohort'})
white_cmap = mcolors.ListedColormap(['white'])
sns.heatmap(cohort_size_df,
annot=True,
cbar=False,
fmt='.0f',
cmap=white_cmap,
ax=ax[0])
fig.tight_layout()
return
This is a example of graph:
I would like to change the format of the most left table to add '$' before the number. I know I have to change the fmt='.0f', but I do not know how. I also could not find documentation on the values I can pass in the fmt parameter. Can someone also explain to me how this works? What values can I use?
Instead of just setting annot=True, a list of strings, with the same shape as the dataframe can be provided:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd
N = 12
cohort_size = pd.DataFrame({0: np.random.randint(20000, 50000, N)}, index=[f'2021:{i:02d}' for i in range(1, 13)])
retention_matrix = np.random.rand(N, N)
retention_matrix[:, 0] = 1
retention_matrix = np.where(np.tril(retention_matrix)[::-1], retention_matrix, np.NaN)
with sns.axes_style("white"):
fig, ax = plt.subplots(1, 2, figsize=(12, 8), sharey=True, gridspec_kw={'width_ratios': [1, 11]})
sns.heatmap(retention_matrix,
annot=True,
fmt='.0%',
cmap='Purples',
cbar_kws={'format': PercentFormatter(1)},
ax=ax[1])
ax[1].set(xlabel='Meses', ylabel='')
cohort_size_df = pd.DataFrame(cohort_size).rename(columns={0: 'Tamanho da cohort'})
labels = [[f'$ {s:,d}'] for s in cohort_size_df.iloc[:, 0]]
sns.heatmap(cohort_size_df,
annot=labels,
cbar=False,
fmt='',
cmap=ListedColormap(['white']),
ax=ax[0])
ax[0].tick_params(axis='y', labelrotation=0)
fig.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import numpy as np
time = np.linspace(-1,5, 100)
fig, (ax1, ax2, ax3) = plt.subplots(3,1, sharex=True, figsize=(5,5))
ax1.plot([-1,0,0,5], [1,1,0,0], color='navy')
ax1.set_ylim(-0.1,1.2)
ax2.plot(time, np.clip(np.exp(-time),0,1), color='darkolivegreen')
ax2.set_ylim(-0.1,1.2)
ax3.plot(time, np.clip(np.exp(-time),0,0.4), color='darksalmon')
ax3.set_ylim(-0.1,0.8)
I have made three subplots and arranged them vertically (x-axis shared). Is there a convenient way to add two vertical lines x=0 and x=-log(0.4) that go through all three subplots?
You can use axvline():
import matplotlib.pyplot as plt
import numpy as np
time = np.linspace(-1.5, 100)
fig, (ax1, ax2, ax3) = plt.subplots(3,1, sharex=True, figsize=(5,5))
ax1.plot([-1,0,0,5], [1,1,0,0], color='navy')
ax1.set_ylim(-0.1,1.2)
ax2.plot(time, np.clip(np.exp(-time),0,1), color='darkolivegreen')
ax2.set_ylim(-0.1,1.2)
ax3.plot(time, np.clip(np.exp(-time),0,0.4), color='darksalmon')
ax3.set_ylim(-0.1,0.8)
# add vertical lines to all subplots
for ax in [ax1, ax2, ax3]:
ax.axvline(20, c='r')
This gives:
If you want the vertical line to go through the whole figure, you need to set up an additional subplot as big as the three subplots taken together and make its background transparent. Then you can draw the line in the background subplot:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec
time = np.linspace(-1.5, 100)
fig = plt.figure()
gs = fig.add_gridspec(3, 2)
ax1 = fig.add_subplot(gs[0, :])
ax1.plot([-1,0,0,5], [1,1,0,0], color='navy')
ax1.set_ylim(-0.1,1.2)
ax2 = fig.add_subplot(gs[1, :], sharex = ax1)
ax2.plot(time, np.clip(np.exp(-time),0,1), color='darkolivegreen')
ax2.set_ylim(-0.1,1.2)
ax3 = fig.add_subplot(gs[2, :], sharex = ax1)
ax3.plot(time, np.clip(np.exp(-time),0,0.4), color='darksalmon')
ax3.set_ylim(-0.1,0.8)
# background axes object for plotting the vertical line
ax = fig.add_subplot(gs[:, :], sharex = ax1)
# set background color to transparent and turn off the frame
ax.patch.set_alpha(0)
ax.axis("off")
# plot the vertical line
ax.axvline(20, c='r')
plt.show()
This gives:
I have a plot with two y-axes, using twinx(). I also give labels to the lines, and want to show them with legend(), but I only succeed to get the labels of one axis in the legend:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(time, Swdown, '-', label = 'Swdown')
ax.plot(time, Rn, '-', label = 'Rn')
ax2 = ax.twinx()
ax2.plot(time, temp, '-r', label = 'temp')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Radiation ($MJ\,m^{-2}\,d^{-1}$)")
ax2.set_ylabel(r"Temperature ($^\circ$C)")
ax2.set_ylim(0, 35)
ax.set_ylim(-20,100)
plt.show()
So I only get the labels of the first axis in the legend, and not the label 'temp' of the second axis. How could I add this third label to the legend?
You can easily add a second legend by adding the line:
ax2.legend(loc=0)
You'll get this:
But if you want all labels on one legend then you should do something like this:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')
time = np.arange(10)
temp = np.random.random(10)*30
Swdown = np.random.random(10)*100-10
Rn = np.random.random(10)*100-10
fig = plt.figure()
ax = fig.add_subplot(111)
lns1 = ax.plot(time, Swdown, '-', label = 'Swdown')
lns2 = ax.plot(time, Rn, '-', label = 'Rn')
ax2 = ax.twinx()
lns3 = ax2.plot(time, temp, '-r', label = 'temp')
# added these three lines
lns = lns1+lns2+lns3
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Radiation ($MJ\,m^{-2}\,d^{-1}$)")
ax2.set_ylabel(r"Temperature ($^\circ$C)")
ax2.set_ylim(0, 35)
ax.set_ylim(-20,100)
plt.show()
Which will give you this:
I'm not sure if this functionality is new, but you can also use the get_legend_handles_labels() method rather than keeping track of lines and labels yourself:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')
pi = np.pi
# fake data
time = np.linspace (0, 25, 50)
temp = 50 / np.sqrt (2 * pi * 3**2) \
* np.exp (-((time - 13)**2 / (3**2))**2) + 15
Swdown = 400 / np.sqrt (2 * pi * 3**2) * np.exp (-((time - 13)**2 / (3**2))**2)
Rn = Swdown - 10
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(time, Swdown, '-', label = 'Swdown')
ax.plot(time, Rn, '-', label = 'Rn')
ax2 = ax.twinx()
ax2.plot(time, temp, '-r', label = 'temp')
# ask matplotlib for the plotted objects and their labels
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Radiation ($MJ\,m^{-2}\,d^{-1}$)")
ax2.set_ylabel(r"Temperature ($^\circ$C)")
ax2.set_ylim(0, 35)
ax.set_ylim(-20,100)
plt.show()
From matplotlib version 2.1 onwards, you may use a figure legend. Instead of ax.legend(), which produces a legend with the handles from the axes ax, one can create a figure legend
fig.legend(loc="upper right")
which will gather all handles from all subplots in the figure. Since it is a figure legend, it will be placed at the corner of the figure, and the loc argument is relative to the figure.
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0,10)
y = np.linspace(0,10)
z = np.sin(x/3)**2*98
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,y, '-', label = 'Quantity 1')
ax2 = ax.twinx()
ax2.plot(x,z, '-r', label = 'Quantity 2')
fig.legend(loc="upper right")
ax.set_xlabel("x [units]")
ax.set_ylabel(r"Quantity 1")
ax2.set_ylabel(r"Quantity 2")
plt.show()
In order to place the legend back into the axes, one would supply a bbox_to_anchor and a bbox_transform. The latter would be the axes transform of the axes the legend should reside in. The former may be the coordinates of the edge defined by loc given in axes coordinates.
fig.legend(loc="upper right", bbox_to_anchor=(1,1), bbox_transform=ax.transAxes)
You can easily get what you want by adding the line in ax:
ax.plot([], [], '-r', label = 'temp')
or
ax.plot(np.nan, '-r', label = 'temp')
This would plot nothing but add a label to legend of ax.
I think this is a much easier way.
It's not necessary to track lines automatically when you have only a few lines in the second axes, as fixing by hand like above would be quite easy. Anyway, it depends on what you need.
The whole code is as below:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')
time = np.arange(22.)
temp = 20*np.random.rand(22)
Swdown = 10*np.random.randn(22)+40
Rn = 40*np.random.rand(22)
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
#---------- look at below -----------
ax.plot(time, Swdown, '-', label = 'Swdown')
ax.plot(time, Rn, '-', label = 'Rn')
ax2.plot(time, temp, '-r') # The true line in ax2
ax.plot(np.nan, '-r', label = 'temp') # Make an agent in ax
ax.legend(loc=0)
#---------------done-----------------
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Radiation ($MJ\,m^{-2}\,d^{-1}$)")
ax2.set_ylabel(r"Temperature ($^\circ$C)")
ax2.set_ylim(0, 35)
ax.set_ylim(-20,100)
plt.show()
The plot is as below:
Update: add a better version:
ax.plot(np.nan, '-r', label = 'temp')
This will do nothing while plot(0, 0) may change the axis range.
One extra example for scatter
ax.scatter([], [], s=100, label = 'temp') # Make an agent in ax
ax2.scatter(time, temp, s=10) # The true scatter in ax2
ax.legend(loc=1, framealpha=1)
Preparation
import numpy as np
from matplotlib import pyplot as plt
fig, ax1 = plt.subplots( figsize=(15,6) )
Y1, Y2 = np.random.random((2,100))
ax2 = ax1.twinx()
Content
I'm surprised it did not show up so far but the simplest way is to either collect them manually into one of the axes objs (that lie on top of each other)
l1 = ax1.plot( range(len(Y1)), Y1, label='Label 1' )
l2 = ax2.plot( range(len(Y2)), Y2, label='Label 2', color='orange' )
ax1.legend( handles=l1+l2 )
or have them collected automatically into the surrounding figure by fig.legend() and fiddle around with the the bbox_to_anchor parameter:
ax1.plot( range(len(Y1)), Y1, label='Label 1' )
ax2.plot( range(len(Y2)), Y2, label='Label 2', color='orange' )
fig.legend( bbox_to_anchor=(.97, .97) )
Finalization
fig.tight_layout()
fig.savefig('stackoverflow.png', bbox_inches='tight')
A quick hack that may suit your needs..
Take off the frame of the box and manually position the two legends next to each other. Something like this..
ax1.legend(loc = (.75,.1), frameon = False)
ax2.legend( loc = (.75, .05), frameon = False)
Where the loc tuple is left-to-right and bottom-to-top percentages that represent the location in the chart.
I found an following official matplotlib example that uses host_subplot to display multiple y-axes and all the different labels in one legend. No workaround necessary. Best solution I found so far.
http://matplotlib.org/examples/axes_grid/demo_parasite_axes2.html
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
host = host_subplot(111, axes_class=AA.Axes)
plt.subplots_adjust(right=0.75)
par1 = host.twinx()
par2 = host.twinx()
offset = 60
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right",
axes=par2,
offset=(offset, 0))
par2.axis["right"].toggle(all=True)
host.set_xlim(0, 2)
host.set_ylim(0, 2)
host.set_xlabel("Distance")
host.set_ylabel("Density")
par1.set_ylabel("Temperature")
par2.set_ylabel("Velocity")
p1, = host.plot([0, 1, 2], [0, 1, 2], label="Density")
p2, = par1.plot([0, 1, 2], [0, 3, 2], label="Temperature")
p3, = par2.plot([0, 1, 2], [50, 30, 15], label="Velocity")
par1.set_ylim(0, 4)
par2.set_ylim(1, 65)
host.legend()
plt.draw()
plt.show()
If you are using Seaborn you can do this:
g = sns.barplot('arguments blah blah')
g2 = sns.lineplot('arguments blah blah')
h1,l1 = g.get_legend_handles_labels()
h2,l2 = g2.get_legend_handles_labels()
#Merging two legends
g.legend(h1+h2, l1+l2, title_fontsize='10')
#removes the second legend
g2.get_legend().remove()
As provided in the example from matplotlib.org, a clean way to implement a single legend from multiple axes is with plot handles:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fig.subplots_adjust(right=0.75)
twin1 = ax.twinx()
twin2 = ax.twinx()
# Offset the right spine of twin2. The ticks and label have already been
# placed on the right by twinx above.
twin2.spines.right.set_position(("axes", 1.2))
p1, = ax.plot([0, 1, 2], [0, 1, 2], "b-", label="Density")
p2, = twin1.plot([0, 1, 2], [0, 3, 2], "r-", label="Temperature")
p3, = twin2.plot([0, 1, 2], [50, 30, 15], "g-", label="Velocity")
ax.set_xlim(0, 2)
ax.set_ylim(0, 2)
twin1.set_ylim(0, 4)
twin2.set_ylim(1, 65)
ax.set_xlabel("Distance")
ax.set_ylabel("Density")
twin1.set_ylabel("Temperature")
twin2.set_ylabel("Velocity")
ax.yaxis.label.set_color(p1.get_color())
twin1.yaxis.label.set_color(p2.get_color())
twin2.yaxis.label.set_color(p3.get_color())
tkw = dict(size=4, width=1.5)
ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
ax.tick_params(axis='x', **tkw)
ax.legend(handles=[p1, p2, p3])
plt.show()
Here is another way to do this:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')
fig = plt.figure()
ax = fig.add_subplot(111)
pl_1, = ax.plot(time, Swdown, '-')
label_1 = 'Swdown'
pl_2, = ax.plot(time, Rn, '-')
label_2 = 'Rn'
ax2 = ax.twinx()
pl_3, = ax2.plot(time, temp, '-r')
label_3 = 'temp'
ax.legend([pl[enter image description here][1]_1, pl_2, pl_3], [label_1, label_2, label_3], loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Radiation ($MJ\,m^{-2}\,d^{-1}$)")
ax2.set_ylabel(r"Temperature ($^\circ$C)")
ax2.set_ylim(0, 35)
ax.set_ylim(-20,100)
plt.show()
enter image description here
The solutions proposed so far have one or two inconvenients:
Handles needs to be collected individually when plotting, e.g. lns1 = ax.plot(time, Swdown, '-', label = 'Swdown'). There is a risk of forgetting handles when updating the code.
Legend is drawn for the whole figure, not by subplot, which is likely a no-go if you have multiple subplots.
This new solution takes advantage of Axes.get_legend_handles_labels() to collect existing handles and labels for the main axis and for the twin axis.
Collecting handles and labels automatically
This numpy operation will scan all axes which share the same subplot area than ax, including ax and return merged handles and labels:
hl = np.hstack([axis.get_legend_handles_labels()
for axis in ax.figure.axes
if axis.bbox.bounds == ax.bbox.bounds])
It can be used to feed legend() arguments this way:
import numpy as np
import matplotlib.pyplot as plt
t = np.arange(1, 200)
signals = [np.exp(-t/20) * np.cos(t*k) for k in (1, 2)]
fig, axes = plt.subplots(nrows=2, figsize=(10, 3), layout='constrained')
axes = axes.flatten()
for i, (ax, signal) in enumerate(zip(axes, signals)):
# Plot as usual, no change to the code
ax.plot(t, signal, label=f'plotted on axes[{i}]', c='C0', lw=9, alpha=0.3)
ax2 = ax.twinx()
ax2.plot(t, signal, label=f'plotted on axes[{i}].twinx()', c='C1')
# The only specificity of the code is when plotting the legend
h, l = np.hstack([axis.get_legend_handles_labels()
for axis in ax.figure.axes
if axis.bbox.bounds == ax.bbox.bounds]).tolist()
ax2.legend(handles=h, labels=l, loc='upper right')