import matplotlib.pyplot as plt
import seaborn as sns
rankings_by_age = star_wars.groupby("Age").agg(np.mean).iloc[:,8:]
age_first = rankings_by_age.iloc[0, :].values
age_second = rankings_by_age.iloc[1, :].values
age_third = rankings_by_age.iloc[2, :].values
age_fourth = rankings_by_age.iloc[3, :].values
fig, ax = plt.subplots(figsize=(12, 9))
ind = np.arange(6)
width = 0.2
rects_1 = ax.bar(ind, age_first, width, color=(114/255,158/255,206/255),
alpha=.8)
rects_2 = ax.bar(ind+width, age_second, width, color=
(255/255,158/255,74/255), alpha=.8)
rects_3 = ax.bar(ind+2*width, age_third, width, color=
(103/255,191/255,92/255), alpha=.8)
rects_4 = ax.bar(ind+3*width, age_fourth, width, color=
(237/255,102/255,93/255), alpha=.8)
ax.set_title("Star Wars Film Rankings by Age")
ax.set_ylabel("Ranking")
ax.set_xticks(ind)
ax.set_xticklabels(titles, rotation=45)
ax.tick_params(top='off', right='off', left='off', bottom='off')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend((rects_1[0], rects_2[0], rects_3[0], rects_4[0]), ('18-29', '30-
44', '45-60', '> 60'), title="Age")
plt.show()
I want to replicate this plot using seaborn, but I am not sure how to go about plotting multiple bars for each category. I understand how to do it using one age group at a time, but getting more than one bar per age group seems tricky. Any help would be appreciated.
Quoting the seaborn bar plot documentation, you can use the hue argument to determine which column of the dataframe the bars should be grouped by.
import seaborn.apionly as sns
import matplotlib.pyplot as plt
df = sns.load_dataset("tips")
ax = sns.barplot(data=df, x="day", y="total_bill", hue="sex")
plt.show()
Related
I'm trying to make a heatmap a heatmap with extensive y axis descriptions.
I would like to know if there is anyways to have a second and a third layer on the y tick labels.
fig, ax = plt.subplots(figsize=(20,25))
sns.set(style="darkgrid")
colName = [r'A', r'B', r'C', r'D', r'E']
colTitile = 'Test'
rowName = [r'a', r'b', r'c', r'd']
rowsName = [r'Vegetables', r'Fruits', r'Meats', r'Cheese',
r'Candy', r'Other']
rowTitile = 'Groups'
heatmapdata= np.arange(100).reshape(24,5)
sns.heatmap(heatmapdata,
cmap = 'turbo',
cbar = True,
vmin=0,
vmax=100,
ax=ax,
xticklabels = colName,
yticklabels = rowName)
for x in np.arange(0,len(ax.get_yticks()),4):
ax.axhline(x, color = 'white', lw=2)
Is there any way to do this? Which function should I use?
Thanks!
The labels for the rows can be set up in the graph settings, but other than that, I think the annotation function is the only way to handle this. the second level group names are set using the annotation function, and the coordinate criteria are set using the axis criteria. Axis labels are added using the text function with axis criteria.
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(figsize=(10,10))
sns.set(style="darkgrid")
colName = [r'A', r'B', r'C', r'D', r'E']
colTitile = 'Test'
rowName = [r'a', r'b', r'c', r'd']
rowsName = [r'Vegetables', r'Fruits', r'Meats', r'Cheese',
r'Candy', r'Other']
rowTitle = 'Groups'
heatmapdata= np.arange(120).reshape(24,5)
sns.heatmap(heatmapdata,
cmap='turbo',
cbar=True,
vmin=0,
vmax=100,
ax=ax,
xticklabels=colName,
yticklabels=np.tile(rowName, 6))
for x in np.arange(0,ax.get_ylim()[0],4):
ax.axhline(x, color = 'white', lw=2)
for idx,g in enumerate(rowsName[::-1]):
ax.annotate(g, xy=(-100, idx*90+45), xycoords='axes points', size=14)
ax.text(x=-0.3, y=0.5, s=rowTitle, ha='center', transform=ax.transAxes, rotation=90, font=dict(size=16))
plt.show()
I have the following code to create a heatmap. However, it creates an overlap of the color bar and the right axis text. The text has no problems, I want it to be in that length.
How can I locate the colorbar on the right/left side of the heatmap with no overlap?
I tried with "pad" parameter in cbar_kws but it didn't help.enter image description here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT=pd.DataFrame(np.random.randn(300,3), columns=list('ABC'))
miniPT=PT.iloc[:,:-1]
SMALL_SIZE = 8
MEDIUM_SIZE = 80
BIGGER_SIZE = 120
plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
plt.figure(figsize=(10, miniPT.shape[0]/5.2))
ax =sns.heatmap(miniPT, annot=False, cmap='RdYlGn')
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list=np.asarray(PT['C'])
asset_list=asset_list[::-1]
ax3 = ax.twinx()
ax3.set_ylim([0,ax.get_ylim()[1]])
ax3.set_yticks(ax.get_yticks())
ax3.set_yticklabels(asset_list, fontsize=MEDIUM_SIZE*0.6)
# colorbar
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=MEDIUM_SIZE)
One way to get the overlap automatically adjusted by matplotlib, is to explicitly create subplots: one for the heatmap and another for the colorbar. sns.heatmap's cbar_ax= parameter can be set to point to this subplot. gridspec_kws= is needed to set the relative sizes. At the end, plt.tight_layout() will adjust all the paddings to make everything fit nicely.
The question's code contains some strange settings (e.g. a fontsize of 80 is immense). Also, 300 rows will inevitably lead to overlapping text (the fontsize needs to be so small that non-overlapping text wouldn't be readable). Here is some more simplified example code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(100, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [10, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
As the plot is quite large, here only the bottom part is pasted, with a link to the full plot.
This is how it would look like with:
fontsize 80 (Note that font sizes are measured in "points per inch", standard 72 points per inch);
figure width of 20 inches (instead of 10);
300 rows
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax, cbar_ax) = plt.subplots(ncols=2, figsize=(20, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 1]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=cbar_ax, ax=ax)
for _, spine in ax.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax.twinx()
ax3.set_ylim(ax.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
cbar_ax.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
My solution was eventually move the colorbar to left side. This is the code and the output:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
PT = pd.DataFrame(np.random.randn(300, 3), columns=list('ABC'))
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, len(PT) / 5.2), gridspec_kw={'width_ratios': [15, 15]})
sns.heatmap(PT.iloc[:, :-1], annot=False, cmap='RdYlGn', cbar_ax=ax0, ax=ax1)
for _, spine in ax1.spines.items():
spine.set_visible(True)
# second axis
asset_list = np.asarray(PT['C'])
ax3 = ax1.twinx()
ax3.set_ylim(ax1.get_ylim())
ax3.set_yticks(np.arange(len(PT)))
ax3.set_yticklabels(asset_list, fontsize=80)
# colorbar
ax0.tick_params(labelsize=80)
plt.tight_layout()
plt.show()
import numpy as np
import pandas as pd
df = pd.DataFrame({"y" : np.random.rand(20)})
ax = df.iloc[:15,:].plot(ls="-", color="b")
ax2 = ax.twinx() #Create a twin Axes sharing the xaxis
df.iloc[15:,:].plot(ls="--", color="r", ax=ax)
plt.axhline(y=0.5,linestyle="--",animated=True,label="False Alaram")
plt.show()
So, first 15 are trend and last 5 are predictions.
I want different colors for trend and pred in background.
Also, how can i add text "Historic" and "Forecast" on graph.
I believe you're looking for fill_between:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"y" : np.random.rand(20)})
fig, ax = plt.subplots(figsize=(8,6))
df.iloc[:15,:].plot(ls="-", color="b", ax=ax)
plt.fill_between(df.iloc[:15].index.tolist(), df.iloc[:15].y.tolist(), alpha=.25, color='b')
df.iloc[15:,:].plot(ls="--", color="r", ax=ax)
plt.axhline(y=0.5,linestyle="--", animated=True, label="False Alaram")
plt.fill_between(df.iloc[15:].index.tolist(), df.iloc[15:].y.tolist(), alpha=.25, color='r')
plt.legend()
plt.show()
I have a seaborn.heatmap plotted from a DataFrame:
import seaborn as sns
import matplotlib.pyplot as plt
fig = plt.figure(facecolor='w', edgecolor='k')
sns.heatmap(collected_data_frame, annot=True, vmax=1.0, cmap='Blues', cbar=False, fmt='.4g')
I would like to create some sort of highlight for a maximum value in each column - it could be a red box around that value, or a red dot plotted next to that value, or the cell could be colored red instead of using Blues. Ideally I'm expecting something like this:
I got the highlight working for DataFrame printing in Jupyter Notebook using tips from this answer:
How can I achieve a similar thing but on a heatmap?
We've customized the heatmap examples in the official reference. The customization examples were created from the responses from this site. It's a form of adding parts to an existing graph. I added a frame around the maximum value, but this is manual.
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
sns.set()
# Load the example flights dataset and convert to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(9, 6))
ax = sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax)
ax.add_patch(Rectangle((10,6),2,2, fill=False, edgecolor='blue', lw=3))
max value:
ymax = max(flights)
ymax
1960
flights.columns.get_loc(ymax)
11
xmax = flights[ymax].idxmax()
xmax
'July'
xpos = flights.index.get_loc(xmax)
xpos
6
ax.add_patch(Rectangle((ymax,xpos),1,1, fill=False, edgecolor='blue', lw=3))
Complete solution based on the answer of #r-beginners:
Generate DataFrame:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn
arr = np.array([[0.9336719 , 0.90119269, 0.90791181, 0.3112451 , 0.56715989,
0.83339874, 0.14571595, 0.36505745, 0.89847367, 0.95317909,
0.16396293, 0.63463356],
[0.93282304, 0.90605976, 0.91276066, 0.30288519, 0.56366228,
0.83032344, 0.14633036, 0.36081791, 0.9041638 , 0.95268572,
0.16803188, 0.63459491],
[0.15215358, 0.4311569 , 0.32324376, 0.51620611, 0.69872915,
0.08811177, 0.80087247, 0.234593 , 0.47973905, 0.21688613,
0.2738223 , 0.38322856],
[0.90406056, 0.89632902, 0.92220635, 0.3022458 , 0.58843012,
0.78159595, 0.17089609, 0.33443782, 0.89997103, 0.93128579,
0.15942313, 0.62644379],
[0.93868063, 0.45617598, 0.17708323, 0.81828266, 0.72986428,
0.82543775, 0.41530088, 0.2604382 , 0.33132295, 0.94686745,
0.05607774, 0.54141198]])
columns_text = [str(num) for num in range(0,12)]
index_text = ['C1', 'C2', 'C3', 'C4', 'C5']
arr_data_frame = pd.DataFrame(arr, columns=columns_text, index=index_text)
Highlighting maximum in a column:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
column_max = arr_data_frame.idxmax(axis=0)
for col, variable in enumerate(columns_text):
position = arr_data_frame.index.get_loc(column_max[variable])
ax.add_patch(Rectangle((col, position),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_column_heatmap.png', dpi = 500, bbox_inches='tight')
Highlighting maximum in a row:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
row_max = arr_data_frame.idxmax(axis=1)
for row, index in enumerate(index_text):
position = arr_data_frame.columns.get_loc(row_max[index])
ax.add_patch(Rectangle((position, row),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_row_heatmap.png', dpi = 500, bbox_inches='tight')
I would like to display the following dataframe in barchart but with double y axis, I want to show areas columns on left side and prices columns on right side:
area1 area2 price1 price2
level
first 263.16 906.58 10443.32 35101.88
second 6879.83 14343.03 2077.79 4415.53
third 31942.75 60864.24 922.87 1774.47
I tried with code below, it works but only display left side.
import matplotlib.pyplot as plt
df.plot(kind='bar')
plt.xticks(rotation=45, fontproperties="SimHei")
plt.xlabel("")
plt.legend()
Thank you.
If I understood you correctly, one way could be this, but you have to "play" a bit with the values of width and position of the ticks:
import pandas as pd
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
width = 0.1
df.area1.plot(kind='bar', color='red', ax=ax, width=width, position=0 )
df.area2.plot(kind='bar', color='orange', ax=ax, width=width, position=1)
df.price1.plot(kind='bar', color='blue', ax=ax2, width=width, position=2)
df.price2.plot(kind='bar', color='green', ax=ax2, width=width, position=3)
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.legend(["Area1", "Area2"], bbox_to_anchor=(0.8,1.0))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(0.9,1.0))
plt.show()
Another way is this:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
# ax.set_xticklabels(ax.get_xticklabels(),rotation=45) # Rotation 45 degrees
width = 0.1
ind = np.arange(len(df))
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.set_xlabel('Level')
ax.bar(ind, df.area1, width, color='red', label='area1')
ax.bar(ind + width, df.area2, width, color='orange', label='area2')
ax2.bar(ind + 2*width, df.price1, width, color='blue', label='price1')
ax2.bar(ind + 3*width, df.price2, width, color='green', label='price2')
ax.set(xticks=(ind + 1.5*width), xticklabels=df.index, xlim=[2*width - 1, len(df)])
ax.legend(["Area1", "Area2"], bbox_to_anchor=(1,1))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(1,0.87))
plt.show()