How to add a dollar sign to a seaborn heatmap - python

I have the following code that generates a Heatmap in Pandas:
def create_cohort(cohort_size,retention_matrix,titulo):
print(f"{titulo}\n")
with sns.axes_style("white"):
fig, ax = plt.subplots(1, 2, figsize=(12, 8), sharey=True, gridspec_kw={'width_ratios': [1, 11]})
# retention matrix
sns.heatmap(retention_matrix,
mask=retention_matrix.isnull(),
annot=True,
fmt='.0%',
cmap='Purples',
ax=ax[1])
ax[1].set_title(f'Cohort: {titulo}', fontsize=16)
ax[1].set(xlabel='Meses',
ylabel='')
# cohort size
cohort_size_df = pd.DataFrame(cohort_size).rename(columns={0: 'Tamanho da cohort'})
white_cmap = mcolors.ListedColormap(['white'])
sns.heatmap(cohort_size_df,
annot=True,
cbar=False,
fmt='.0f',
cmap=white_cmap,
ax=ax[0])
fig.tight_layout()
return
This is a example of graph:
I would like to change the format of the most left table to add '$' before the number. I know I have to change the fmt='.0f', but I do not know how. I also could not find documentation on the values I can pass in the fmt parameter. Can someone also explain to me how this works? What values can I use?

Instead of just setting annot=True, a list of strings, with the same shape as the dataframe can be provided:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd
N = 12
cohort_size = pd.DataFrame({0: np.random.randint(20000, 50000, N)}, index=[f'2021:{i:02d}' for i in range(1, 13)])
retention_matrix = np.random.rand(N, N)
retention_matrix[:, 0] = 1
retention_matrix = np.where(np.tril(retention_matrix)[::-1], retention_matrix, np.NaN)
with sns.axes_style("white"):
fig, ax = plt.subplots(1, 2, figsize=(12, 8), sharey=True, gridspec_kw={'width_ratios': [1, 11]})
sns.heatmap(retention_matrix,
annot=True,
fmt='.0%',
cmap='Purples',
cbar_kws={'format': PercentFormatter(1)},
ax=ax[1])
ax[1].set(xlabel='Meses', ylabel='')
cohort_size_df = pd.DataFrame(cohort_size).rename(columns={0: 'Tamanho da cohort'})
labels = [[f'$ {s:,d}'] for s in cohort_size_df.iloc[:, 0]]
sns.heatmap(cohort_size_df,
annot=labels,
cbar=False,
fmt='',
cmap=ListedColormap(['white']),
ax=ax[0])
ax[0].tick_params(axis='y', labelrotation=0)
fig.tight_layout()
plt.show()

Related

how to add hatches to cells in seaborn.heatmap

I tried to visualize my data with seaborn.heatmap.
However, the problem I have is that when I print it out in grayscle, the image is hard to read.
I follow many similar questions but it didn't work.
Is there anyway to add hatches over the cells in seaborn.heatmap?
My code is as below:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.read_csv("file.csv")
sns.heatmap(df, annot=False, fmt='.0f', square=True,
cmap="coolwarm", linewidths=1, cbar=False)
plt.show()
You could create a loop, dividing the values into e.g. 4 groups and assign a hatch pattern to each of them via pcolor applied to the subset.
Here is an example starting from random test data:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
column_names = [f'{c:.2f}' for c in np.arange(0, 1.5001, 0.05)]
row_names = ['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe']
df = pd.DataFrame(np.random.normal(0.3, 1, (len(row_names), len(column_names))).cumsum(axis=1) + 5,
columns=column_names, index=row_names)
values = df.values
vmin = values.min()
vmax = values.max()
patterns = ['', 'oo', '////', 'XXX']
bounds = np.linspace(vmin, vmax, len(patterns) + 1)
bounds[-1] += 1
sns.set_style('white')
fig, ax = plt.subplots(figsize=(12, 5))
sns.heatmap(data=df, linewidths=1, square=True, cmap='coolwarm', linecolor='white', cbar=False, ax=ax)
x = np.arange(df.shape[1] + 1)
y = np.arange(df.shape[0] + 1)
handles = []
norm = plt.Normalize(vmin, vmax)
cmap = plt.get_cmap('coolwarm')
for pattern, b0, b1 in zip(patterns, bounds[:-1], bounds[1:]):
ax.pcolor(x, y, np.where((values >= b0) & (values < b1), values, np.nan), cmap=cmap, norm=norm,
hatch=pattern, ec='black', lw=1)
handles.append(plt.Rectangle((0, 0), 0, 0, color=cmap(norm((b0 + b1) / 2)), ec='black',
hatch=pattern, label=f'{b0:5.2f}-{b1:5.2f}'))
ax.hlines(y, 0, x.max(), color='w', lw=2)
ax.vlines(x, 0, y.max(), color='w', lw=2)
ax.legend(handles=handles, bbox_to_anchor=(1.01, 1.02), loc='upper left',
handlelength=2, handleheight=2, frameon=False)
plt.tight_layout()
plt.show()

Plotting multiple seaborn heatmaps with individual color bar

Is it possible to plot multiple seaborn heatmaps into a single figure, with a shared yticklabel, and individual color bars, like the figure below?
What I can do is to plot the heatmaps individually, using the following code:
#Figure 1
plt.figure()
sns.set()
comp = sns.heatmap(df, cmap="coolwarm", linewidths=.5, xticklabels=True, yticklabels=True, cbar_kws={"orientation": "horizontal", "label": "Pathway completeness", "pad": 0.004})
comp.set_xticklabels(comp.get_xticklabels(), rotation=-90)
comp.xaxis.tick_top() # x axis on top
comp.xaxis.set_label_position('top')
cbar = comp.collections[0].colorbar
cbar.set_ticks([0, 50, 100])
cbar.set_ticklabels(['0%', '50%', '100%'])
figure = comp.get_figure()
figure.savefig("hetmap16.png", format='png', bbox_inches='tight')
#Figure 2 (figure 3 is the same, but with a different database)
plt.figure()
sns.set()
df = pd.DataFrame(heatMapFvaMinDictP)
fvaMax = sns.heatmap(df, cmap="rocket_r", linewidths=.5, xticklabels=True, cbar_kws={"orientation": "horizontal", "label": "Minimum average flux", "pad": 0.004})
fvaMax.set_xticklabels(fvaMax.get_xticklabels(), rotation=-90)
fvaMax.xaxis.tick_top() # x axis on top
fvaMax.xaxis.set_label_position('top')
fvaMax.tick_params(axis='y', labelleft=False)
figure = fvaMax.get_figure()
figure.savefig("fva1.png", format='png', bbox_inches='tight')
Seaborn builds upon matplotlib, which can be used for further customizing plots. plt.subplots(ncols=3, sharey=True, ...) creates three subplots with a shared y-axis. Adding ax=ax1 to sns.heatmap(..., ax=...) creates the heatmap on the desired subplot. Note that the return value of sns.heatmap is again that same ax.
The following code shows an example. vmin and vmax are explicitly set for the first heatmap to make sure that both values will appear in the colorbar (the default colorbar runs between the minimum and maximum of the encountered values).
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, sharey=True, figsize=(20, 8))
N = 20
labels = [''.join(np.random.choice(list('abcdefghi '), 40)) for _ in range(N)]
df = pd.DataFrame({'column 1': np.random.uniform(0, 100, N), 'column 2': np.random.uniform(0, 100, N)},
index=labels)
sns.heatmap(df, cmap="coolwarm", linewidths=.5, xticklabels=True, yticklabels=True, ax=ax1, vmin=0, vmax=100,
cbar_kws={"orientation": "horizontal", "label": "Pathway completeness", "pad": 0.004})
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=-90)
ax1.xaxis.tick_top() # x axis on top
ax1.xaxis.set_label_position('top')
cbar = ax1.collections[0].colorbar
cbar.set_ticks([0, 50, 100])
cbar.set_ticklabels(['0%', '50%', '100%'])
for ax in (ax2, ax3):
max_value = 10 if ax == ax2 else 1000
df = pd.DataFrame({'column 1': np.random.uniform(0, max_value, N), 'column 2': np.random.uniform(0, max_value, N)},
index=labels)
sns.heatmap(df, cmap="rocket_r", linewidths=.5, xticklabels=True, ax=ax,
cbar_kws={"orientation": "horizontal", "pad": 0.004,
"label": ("Minimum" if ax == ax2 else "Minimum") + " average flux"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=-90)
ax.xaxis.tick_top() # x axis on top
ax.xaxis.set_label_position('top')
plt.tight_layout()
fig.savefig("subplots.png", format='png', bbox_inches='tight')
plt.show()
You can concatenate the two dataframes and use FacetGrid with FacetGrid.map_dataframe, and I guess you might need to adjust the aesthetics a bit. Don't have your data so I try it with an example data:
import pandas as pd
import numpy as np
import seaborn as sns
np.random.seed(111)
df1 = pd.DataFrame({'A':np.random.randn(15),'B':np.random.randn(15)},
index=['row_variable'+str(i+1) for i in range(15)])
df2 = pd.DataFrame({'A':np.random.randn(15),'B':np.random.randn(15)},
index=['row_variable'+str(i+1) for i in range(15)])
We annotate the data.frames with a column indicating the database like you have, and also set a dictionary for the color schemes for each dataframes:
df1['database'] = "database1"
df2['database'] = "database2"
dat = pd.concat([df1,df2])
cdict = {'database1':'rocket_r','database2':'coolwarm'}
And define a function to draw the heatmap:
def heat(data,color):
sns.heatmap(data[['A','B']],cmap=cdict[data['database'][0]],
cbar_kws={"orientation": "horizontal"})
Then facet:
fg = sns.FacetGrid(data=dat, col='database',aspect=0.7,height=4)
fg.map_dataframe(heat)

Plot matplotlib histogram legend on separate figure

Given a line plot obtained with ax.plot(), I have the following handy code to plot the legend on a separate figure:
fig, ax = plt.subplots()
ax.plot([0, 2], label='a', linestyle='--')
ax.plot([-1, 1], label='b', linestyle='dotted')
fig = plt.figure(figsize=(30, 4), constrained_layout=True)
fig.legend(ax.lines, [l.get_label() for l in ax.lines],
loc="upper center")
However if I do this with a histogram (using ax.hist()), ax.lines is empty and I cannot get the labels and styles used. Is it possible to do that still ?
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.random.randn(1000, 3)
colors = ['red', 'tan', 'lime']
_,_,patches = ax.hist(x, 10, color=colors, label=colors)
fig = plt.figure(figsize=(30, 4), constrained_layout=True)
fig.legend([p[0] for p in patches], [p[0].get_label() for p in patches],
loc="upper center")
UPDATE as per comment: you can achieve the same using ax.patches instead of patches returned by ax.hist like so:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.random.randn(1000, 3)
colors = ['red', 'tan', 'lime']
n_bins = 10
ax.hist(x, n_bins, color=colors, label=colors)
fig = plt.figure(figsize=(3, 4), constrained_layout=True)
fig.legend(ax.patches[::n_bins], [p.get_label() for p in ax.patches[::n_bins]], loc="upper center")

Highlighting maximum value in a column on a seaborn heatmap

I have a seaborn.heatmap plotted from a DataFrame:
import seaborn as sns
import matplotlib.pyplot as plt
fig = plt.figure(facecolor='w', edgecolor='k')
sns.heatmap(collected_data_frame, annot=True, vmax=1.0, cmap='Blues', cbar=False, fmt='.4g')
I would like to create some sort of highlight for a maximum value in each column - it could be a red box around that value, or a red dot plotted next to that value, or the cell could be colored red instead of using Blues. Ideally I'm expecting something like this:
I got the highlight working for DataFrame printing in Jupyter Notebook using tips from this answer:
How can I achieve a similar thing but on a heatmap?
We've customized the heatmap examples in the official reference. The customization examples were created from the responses from this site. It's a form of adding parts to an existing graph. I added a frame around the maximum value, but this is manual.
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
sns.set()
# Load the example flights dataset and convert to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(9, 6))
ax = sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax)
ax.add_patch(Rectangle((10,6),2,2, fill=False, edgecolor='blue', lw=3))
max value:
ymax = max(flights)
ymax
1960
flights.columns.get_loc(ymax)
11
xmax = flights[ymax].idxmax()
xmax
'July'
xpos = flights.index.get_loc(xmax)
xpos
6
ax.add_patch(Rectangle((ymax,xpos),1,1, fill=False, edgecolor='blue', lw=3))
Complete solution based on the answer of #r-beginners:
Generate DataFrame:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn
arr = np.array([[0.9336719 , 0.90119269, 0.90791181, 0.3112451 , 0.56715989,
0.83339874, 0.14571595, 0.36505745, 0.89847367, 0.95317909,
0.16396293, 0.63463356],
[0.93282304, 0.90605976, 0.91276066, 0.30288519, 0.56366228,
0.83032344, 0.14633036, 0.36081791, 0.9041638 , 0.95268572,
0.16803188, 0.63459491],
[0.15215358, 0.4311569 , 0.32324376, 0.51620611, 0.69872915,
0.08811177, 0.80087247, 0.234593 , 0.47973905, 0.21688613,
0.2738223 , 0.38322856],
[0.90406056, 0.89632902, 0.92220635, 0.3022458 , 0.58843012,
0.78159595, 0.17089609, 0.33443782, 0.89997103, 0.93128579,
0.15942313, 0.62644379],
[0.93868063, 0.45617598, 0.17708323, 0.81828266, 0.72986428,
0.82543775, 0.41530088, 0.2604382 , 0.33132295, 0.94686745,
0.05607774, 0.54141198]])
columns_text = [str(num) for num in range(0,12)]
index_text = ['C1', 'C2', 'C3', 'C4', 'C5']
arr_data_frame = pd.DataFrame(arr, columns=columns_text, index=index_text)
Highlighting maximum in a column:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
column_max = arr_data_frame.idxmax(axis=0)
for col, variable in enumerate(columns_text):
position = arr_data_frame.index.get_loc(column_max[variable])
ax.add_patch(Rectangle((col, position),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_column_heatmap.png', dpi = 500, bbox_inches='tight')
Highlighting maximum in a row:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
row_max = arr_data_frame.idxmax(axis=1)
for row, index in enumerate(index_text):
position = arr_data_frame.columns.get_loc(row_max[index])
ax.add_patch(Rectangle((position, row),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_row_heatmap.png', dpi = 500, bbox_inches='tight')

Adding minor ticks to pandas plot

I have the following code:
from pandas_datareader import data as web
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(2, 1)
df = web.DataReader('F', 'yahoo')
df2 = web.DataReader('Fb', 'yahoo')
ax = df.plot(figsize=(35,15), ax=ax1)
df2.plot(y = 'Close', figsize=(35,15), ax=ax2)
plt.show()
This produces the chart which looks like this:
How can i change the minor ticks in pandas plot so it produces the x axis which looks like this:
Check this code:
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import matplotlib.dates as md
fig, (ax1, ax2) = plt.subplots(2, 1)
df = web.DataReader('F', 'yahoo')
df2 = web.DataReader('Fb', 'yahoo')
ax = df.plot(figsize=(35,15), ax=ax1)
df2.plot(y = 'Close', figsize=(35,15), ax=ax2)
for ax in (ax1, ax2):
ax.xaxis.set_major_locator(md.MonthLocator(bymonth = range(1, 13, 6)))
ax.xaxis.set_major_formatter(md.DateFormatter('%b\n%Y'))
ax.xaxis.set_minor_locator(md.MonthLocator())
plt.setp(ax.xaxis.get_majorticklabels(), rotation = 0 )
plt.show()
You can manage the xticks with the ax.xaxis methods. The above code produce this plot:

Categories

Resources