Put labels in the center of each bar - python

I've created a bar plot with matplotlib pyplot library, using the following code:
fig = plt.figure(figsize=(15, 15), facecolor='white')
ax_left = fig.add_subplot(221)
ax_left.grid()
ax_left.bar(
[ix for ix in range(len(resp_tx.keys()))],
resp_tx.values,
#align='center',
tick_label=resp_tx.keys(),
color='#A6C307',
edgecolor='white',
)
ax_left.tick_params(axis='x', labelsize=10)
for label in ax_left.get_xticklabels():
label.set_rotation(45)
Where resp_tx is a pandas Series that looks like this:
APPROVED 90
ABANDONED_TRANSACTION 38
INTERNAL_PAYMENT_PROVIDER_ERROR 25
CANCELLED_TRANSACTION_MERCHANT 24
ENTITY_DECLINED 6
CANCELLED_TRANSACTION_PAYER 2
Name: resp_tx, dtype: int64
This is the result but I'm not able to put the labels in the right place. How can I put the tick labels in the center of the bar?

I resolved the issue with the following modifications:
resp_tx = self.tx_per_account[account].resp_tx.value_counts()
ax_left = fig.add_subplot(221)
ax_left.grid()
ax_left.bar(
[ix for ix in range(len(resp_tx.keys()))],
resp_tx.values,
color='#A6C307',
edgecolor='white',
)
ax_left.set_xticks([ix+0.4 for ix in range(len(resp_tx.keys()))])
ax_left.set_xticklabels(resp_tx.keys(), rotation=40, ha='right')
ax_left.set_ylim(top=(max(resp_tx.values)+max(resp_tx.values)*0.05))
ax_left.set_xlim(left=-0.2)
#ax_left.spines['top'].set_visible(False)
ax_left.spines['right'].set_visible(False)
#ax_left.spines['bottom'].set_visible(False)
ax_left.spines['left'].set_visible(False)

Related

How to get two legends using pandas plot, one for the colors of the stacked bars and one for the hatches of the bars?

I have been trying to understand the answer of this post in order to populate two different legends.
I create a clustered stacked bar plot with different hatches for each bar and my code below is a bit different from the answer of the aforementioned post.
But I have not been able to figure out how to get one legend with the colors and one legend with the hatches.
The color legend should correspond to A, B, C, D, E and the hatch legend should indicate "with" if bar is hatched and "without" if non-hatched.
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap as coloring
# copy the dfs below and use pd.read_clipboard() to reproduce
df_1
A B C D E
Mg 10 15 23 25 27
Ca 30 33 0 20 17
df_2
A B C D E
Mg 20 12 8 40 10
Ca 7 26 12 22 16
hatches=(' ', '//')
colors_ABCDE=['tomato', 'gold', 'greenyellow', 'forestgreen', 'palevioletred']
dfs=[df_1,df_2]
for each_df, df in enumerate(dfs):
df.plot(ax=plt.subplot(111), kind="barh", \
stacked=True, hatch=hatches[each_df], \
colormap=coloring.from_list("my_colormap", colors_ABCDE), \
figsize=(7,2.5), position=len(dfs)-each_df-1, \
align='center', width=0.2, edgecolor="darkgrey")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=12)
The plot I manage to get is:
Any ideas how to create two legends and place them one next to the other or one below the other? Thanks in advance ^_^
Since adding legends in matplotlib is a complex, extensive step, consider using the very link you cite with function solution by #jrjc. However, you will need to adjust function to your horizontal bar graph needs. Specifically:
Add an argument for color map and in DataFrame.plot call
Adjust bar plot from kind='bar' to kind='barh' for horizontal version
Swap x for y in line: rect.set_y(rect.get_y() + 1 / float(n_df + 1) * i / float(n_col))
Swap width for height in line: rect.set_height(1 / float(n_df + 1))
Adjust axe.set_xticks and axe.set_xticklabels for np.arange(0, 120, 20) values
Function
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap as coloring
def plot_clustered_stacked(dfall, labels=None, title="multiple stacked bar plot", H="//",
colors_ABCDE=['tomato', 'gold', 'greenyellow', 'forestgreen', 'palevioletred'], **kwargs):
"""
CREDIT: #jrjc (https://stackoverflow.com/a/22845857/1422451)
Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot.
labels is a list of the names of the dataframe, used for the legend
title is a string for the title of the plot
H is the hatch used for identification of the different dataframe
"""
n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
axe = plt.subplot(111)
for df in dfall : # for each data frame
axe = df.plot(kind="barh",
linewidth=0,
stacked=True,
ax=axe,
legend=False,
grid=False,
colormap=coloring.from_list("my_colormap", colors_ABCDE),
edgecolor="darkgrey",
**kwargs) # make bar plots
h,l = axe.get_legend_handles_labels() # get the handles we want to modify
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
for j, pa in enumerate(h[i:i+n_col]):
for rect in pa.patches: # for each index
rect.set_y(rect.get_y() + 1 / float(n_df + 2) * i / float(n_col))
rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_height(1 / float(n_df + 2))
axe.set_xticks(np.arange(0, 125, 20))
axe.set_xticklabels(np.arange(0, 125, 20).tolist(), rotation = 0)
axe.margins(x=0, tight=None)
axe.set_title(title)
# Add invisible data to add another legend
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", hatch=H * i, edgecolor="darkgrey"))
l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe
Call
plt.figure(figsize=(10, 4))
plot_clustered_stacked([df_1, df_2],["df_1", "df_2"])
plt.show()
plt.clf()
plt.close()
Output
I thought that this function solution by #jrjc is rather perplexing for my understanding and thus, I preferred to alter my own thing a little and adjust it.
So, it took my some time to understand that when a second legend is created for a plot, python automatically erases the first one and this is when add_artist() must be employed.
The other prerequisite in order to add the second legend is to name the plot and apply the .add_artist() method to that specific plot, so that python knows where to stick that new piece.
In short, this is how I managed to create the plot I had in mind and I hope that the comments will make it somehow clearer and useful for anyone.
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap as coloring
import matplotlib.patches as mpatches
# copy the dfs below and use pd.read_clipboard() to reproduce
df_1
A B C D E
Mg 10 15 23 25 27
Ca 30 33 0 20 17
df_2
A B C D E
Mg 20 12 8 40 10
Ca 7 26 12 22 16
hatches=(' ', '//')
colors_ABCDE=['tomato', 'gold', 'greenyellow', 'forestgreen', 'palevioletred']
dfs=[df_1,df_2]
for each_df, df in enumerate(dfs):
#I name the plot as "figure"
figure=df.plot(ax=plt.subplot(111), kind="barh", \
stacked=True, hatch=hatches[each_df], \
colormap=coloring.from_list("my_colormap", colors_ABCDE), \
figsize=(7,2.5), position=len(dfs)-each_df-1, \
align='center', width=0.2, edgecolor="darkgrey", \
legend=False) #I had to False the legend too
legend_1=plt.legend(df_1.columns, loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=12)
patch_hatched = mpatches.Patch(facecolor='beige', hatch='///', edgecolor="darkgrey", label='hatched')
patch_unhatched = mpatches.Patch(facecolor='beige', hatch=' ', edgecolor="darkgrey", label='non-hatched')
legend_2=plt.legend(handles=[patch_hatched, patch_unhatched], loc='center left', bbox_to_anchor=(1.15, 0.5), fontsize=12)
# as soon as a second legend is made, the first disappears and needs to be added back again
figure.add_artist(legend_1) #python now knows that "figure" must take the "legend_1" along with "legend_2"
I am pretty sure that it can be even more elegant and automated.

half (not split!) violin plots in seaborn

Currently seaborn offers functionality for split violinplots by setting split=True, according to a hue variable. I would like to make a 'half' violin plot, i.e. a plot where half of each violin is omitted. Such a plot depicts something similar to a pdf for each continuous variable, plotted on one side of each vertical line of each categorical variable only.
I have managed to trick seaborn to plot this with an extra data point outside the plotted range of values and an extra dummy hue, but I would like to know if this can be done without actually altering the dataset, e.g. within sns.violinplot() arguments.
For instance, this graph:
Was created by this snippet:
# imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# load dataset from seaborn
datalist = sns.get_dataset_names()
dataset_name = 'iris'
if dataset_name in datalist:
df = sns.load_dataset(dataset_name)
else:
print("Dataset with name: " + dataset_name + " was not found in the available datasets online by seaborn.")
# prepare data
df2 = df.append([-999,-999,-999,-999,'setosa'])
df2['huecol'] = 0.0
df2['huecol'].iloc[-1]= -999
# plot
fig = plt.figure(figsize=(6,6))
sns.violinplot(x='species',y="sepal_width",
split=True, hue ='huecol', inner = 'quartile',
palette="pastel", data=df2, legend=False)
plt.title('iris')
# remove hue legend
leg = plt.gca().legend()
leg.remove()
plt.ylim([1,5.0])
plt.show()
I was looking for a solution similar to this but did not find anything satisfactory. I ended up calling seaborn.kdeplot multiple times as violinplot is essentially a one-sided kernel density plot.
Example
Function definition for categorical_kde_plot below
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=False,
)
with horizontal=True, the output would look like:
Code
import seaborn as sns
from matplotlib import pyplot as plt
def categorical_kde_plot(
df,
variable,
category,
category_order=None,
horizontal=False,
rug=True,
figsize=None,
):
"""Draw a categorical KDE plot
Parameters
----------
df: pd.DataFrame
The data to plot
variable: str
The column in the `df` to plot (continuous variable)
category: str
The column in the `df` to use for grouping (categorical variable)
horizontal: bool
If True, draw density plots horizontally. Otherwise, draw them
vertically.
rug: bool
If True, add also a sns.rugplot.
figsize: tuple or None
If None, use default figsize of (7, 1*len(categories))
If tuple, use that figsize. Given to plt.subplots as an argument.
"""
if category_order is None:
categories = list(df[category].unique())
else:
categories = category_order[:]
figsize = (7, 1.0 * len(categories))
fig, axes = plt.subplots(
nrows=len(categories) if horizontal else 1,
ncols=1 if horizontal else len(categories),
figsize=figsize[::-1] if not horizontal else figsize,
sharex=horizontal,
sharey=not horizontal,
)
for i, (cat, ax) in enumerate(zip(categories, axes)):
sns.kdeplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
# kde kwargs
bw_adjust=0.5,
clip_on=False,
fill=True,
alpha=1,
linewidth=1.5,
ax=ax,
color="lightslategray",
)
keep_variable_axis = (i == len(fig.axes) - 1) if horizontal else (i == 0)
if rug:
sns.rugplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
ax=ax,
color="black",
height=0.025 if keep_variable_axis else 0.04,
)
_format_axis(
ax,
cat,
horizontal,
keep_variable_axis=keep_variable_axis,
)
plt.tight_layout()
plt.show()
def _format_axis(ax, category, horizontal=False, keep_variable_axis=True):
# Remove the axis lines
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
if horizontal:
ax.set_ylabel(None)
lim = ax.get_ylim()
ax.set_yticks([(lim[0] + lim[1]) / 2])
ax.set_yticklabels([category])
if not keep_variable_axis:
ax.get_xaxis().set_visible(False)
ax.spines["bottom"].set_visible(False)
else:
ax.set_xlabel(None)
lim = ax.get_xlim()
ax.set_xticks([(lim[0] + lim[1]) / 2])
ax.set_xticklabels([category])
if not keep_variable_axis:
ax.get_yaxis().set_visible(False)
ax.spines["left"].set_visible(False)
if __name__ == "__main__":
df = sns.load_dataset("tips")
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=True,
)
The answer is simply, no, it's not possible with seaborn without tricking it into thinking there is a hue present.
This answer shows how to do it in matplotlib and in principle the same can be applied to seaborn violinplots as well, namely to cut out half of the violin path.
It's not necessary to modify the data:
ax = sns.violinplot(
data=tips,
x="day", y="total_bill", hue=True,
hue_order=[True, False], split=True,
)
ax.legend_ = None

Matplotlib Different Scaled Y-Axes

I have a dataframe with the data below.
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
I want to graph yhat_normal and revenue on the same y-axis and abs_percent_diff on a y-axis with a different scale.
df_vis = df_vis.set_index('ds')
df_vis[['rev', 'yhat_normal']].plot(figsize=(20, 12))
I can easily graph rev and yhat_normal with the code above, but I am struggling to get abs_percent_diff on a different y-axis scale. I tried converting my columns to numpy arrays and doing this, but it looks terrible.
npdate = df_vis.as_matrix(columns= ['ds'])
nppredictions = df_vis.as_matrix(columns= ['yhat_normal'])
npactuals = df_vis.as_matrix(columns= ['rev'])
npmape = df_vis.as_matrix(columns=['abs_percent_diff'])
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
fig.set_size_inches(20,10)
ax1.plot_date(npdate, nppredictions, ls= '-', color= 'b')
ax1.plot_date(npdate, npactuals, ls='-', color='g')
ax2.plot_date(npdate, npmape, 'r-')
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1 data', color='g')
ax2.set_ylabel('Y2 data', color='b')
plt.show()
This is what I want. Where the red line is the abs_percent_diff. Obviously, I drew the line by hand so it is not accurate.
I'm not sure if I got the problem correclty, but it seems you simply want to draw one of the dataframe columns at the bottom of the plot area.
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
ex_dict = {'revenue': [613663, 1693667, 2145183, 2045065, 2036406,
1708862, 1068232, 1196899, 2185852, 2165778, 2144738, 2030337,
1784067],
'abs_percent_diff': [0.22279211315310588, 0.13248909660765254,
0.12044821447874667, 0.09438674840975962, 0.1193588387687364,
0.062100921139322744, 0.05875297161175445, 0.06240362963749895,
0.05085338590212515, 0.034877614941165744, 0.012263947005671703,
0.029227374323993634, 0.023411816504907524],
'ds': [dt.date(2017,1,1), dt.date(2017,1,2), dt.date(2017,1,3),
dt.date(2017,1,4), dt.date(2017,1,5), dt.date(2017,1,6),
dt.date(2017,1,7), dt.date(2017,1,8), dt.date(2017,1,9),
dt.date(2017,1,10), dt.date(2017,1,11), dt.date(2017,1,12),
dt.date(2017,1,13)],
'yhat_normal': [501853.9074623253, 1952329.3521464923, 1914575.7673396615,
1868685.8215084015, 1819261.1068672044, 1608945.031482406,
1008953.0123101478, 1126595.36037955, 2302965.598289115,
2244044.9351591542, 2171367.536396199, 2091465.0313570146,
1826836.562382966]}
df_vis=pd.DataFrame.from_dict(ex_dict)
df_vis = df_vis.set_index('ds')
ax = df_vis[['revenue','yhat_normal']].plot(figsize=(13, 8))
ax2 = df_vis['abs_percent_diff'].plot(secondary_y=True, ax=ax)
ax2.set_ylim(0,1)
plt.show()

Horizontal stacked bar plot and add labels to each section

I am trying to replicate the following image in matplotlib and it seems barh is my only option. Though it appears that you can't stack barh graphs so I don't know what to do
If you know of a better python library to draw this kind of thing, please let me know.
This is all I could come up with as a start:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
y_pos = np.arange(len(people))
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
ax.barh(y_pos, bottomdata,color='r',align='center')
ax.barh(y_pos, topdata,color='g',align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
I would then have to add labels individually using ax.text which would be tedious. Ideally I would like to just specify the width of the part to be inserted then it updates the center of that section with a string of my choosing. The labels on the outside (e.g. 3800) I can add myself later, it is mainly the labeling over the bar section itself and creating this stacked method in a nice way I'm having problems with. Can you even specify a 'distance' i.e. span of color in any way?
Edit 2: for more heterogeneous data. (I've left the above method since I find it more usual to work with the same number of records per series)
Answering the two parts of the question:
a) barh returns a container of handles to all the patches that it drew. You can use the coordinates of the patches to aid the text positions.
b) Following these two answers to the question that I noted before (see Horizontal stacked bar chart in Matplotlib), you can stack bar graphs horizontally by setting the 'left' input.
and additionally c) handling data that is less uniform in shape.
Below is one way you could handle data that is less uniform in shape is simply to process each segment independently.
import numpy as np
import matplotlib.pyplot as plt
# some labels for each row
people = ('A','B','C','D','E','F','G','H')
r = len(people)
# how many data points overall (average of 3 per person)
n = r * 3
# which person does each segment belong to?
rows = np.random.randint(0, r, (n,))
# how wide is the segment?
widths = np.random.randint(3,12, n,)
# what label to put on the segment (xrange in py2.7, range for py3)
labels = range(n)
colors ='rgbwmc'
patch_handles = []
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
left = np.zeros(r,)
row_counts = np.zeros(r,)
for (r, w, l) in zip(rows, widths, labels):
print r, w, l
patch_handles.append(ax.barh(r, w, align='center', left=left[r],
color=colors[int(row_counts[r]) % len(colors)]))
left[r] += w
row_counts[r] += 1
# we know there is only one patch but could enumerate if expanded
patch = patch_handles[-1][0]
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x, y, "%d%%" % (l), ha='center',va='center')
y_pos = np.arange(8)
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
Which produces a graph like this , with a different number of segments present in each series.
Note that this is not particularly efficient since each segment used an individual call to ax.barh. There may be more efficient methods (e.g. by padding a matrix with zero-width segments or nan values) but this likely to be problem-specific and is a distinct question.
Edit: updated to answer both parts of the question.
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
segments = 4
# generate some multi-dimensional data & arbitrary labels
data = 3 + 10* np.random.rand(segments, len(people))
percentages = (np.random.randint(5,20, (len(people), segments)))
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
colors ='rgbwmc'
patch_handles = []
left = np.zeros(len(people)) # left alignment of data starts at zero
for i, d in enumerate(data):
patch_handles.append(ax.barh(y_pos, d,
color=colors[i%len(colors)], align='center',
left=left))
# accumulate the left-hand offsets
left += d
# go through all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x,y, "%d%%" % (percentages[i,j]), ha='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
You can achieve a result along these lines (note: the percentages I used have nothing to do with the bar widths, as the relationship in the example seems unclear):
See Horizontal stacked bar chart in Matplotlib for some ideas on stacking horizontal bar plots.
Imports and Test DataFrame
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1, seaborn 0.11.2
For vertical stacked bars see Stacked Bar Chart with Centered Labels
import pandas as pd
import numpy as np
# create sample data as shown in the OP
np.random.seed(365)
people = ('A','B','C','D','E','F','G','H')
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
# create the dataframe
df = pd.DataFrame({'Female': bottomdata, 'Male': topdata}, index=people)
# display(df)
Female Male
A 12.41 7.42
B 9.42 4.10
C 9.85 7.38
D 8.89 10.53
E 8.44 5.92
F 6.68 11.86
G 10.67 12.97
H 6.05 7.87
Updated with matplotlib v3.4.2
Use matplotlib.pyplot.bar_label
See How to add value labels on a bar chart for additional details and examples with .bar_label.
labels = [f'{v.get_width():.2f}%' if v.get_width() > 0 else '' for v in c ] for python < 3.8, without the assignment expression (:=).
Plotted using pandas.DataFrame.plot with kind='barh'
ax = df.plot(kind='barh', stacked=True, figsize=(8, 6))
for c in ax.containers:
# customize the label to account for cases when there might not be a bar section
labels = [f'{w:.2f}%' if (w := v.get_width()) > 0 else '' for v in c ]
# set the bar label
ax.bar_label(c, labels=labels, label_type='center')
# uncomment and use the next line if there are no nan or 0 length sections; just use fmt to add a % (the previous two lines of code are not needed, in this case)
# ax.bar_label(c, fmt='%.2f%%', label_type='center')
# move the legend
ax.legend(bbox_to_anchor=(1.025, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Using seaborn
sns.barplot does not have an option for stacked bar plots, however, sns.histplot and sns.displot can be used to create horizontal stacked bars.
seaborn typically requires the dataframe to be in a long, instead of wide, format, so use pandas.DataFrame.melt to reshape the dataframe.
Reshape dataframe
# convert the dataframe to a long form
df = df.reset_index()
df = df.rename(columns={'index': 'People'})
dfm = df.melt(id_vars='People', var_name='Gender', value_name='Percent')
# display(dfm)
People Gender Percent
0 A Female 12.414557
1 B Female 9.416027
2 C Female 9.846105
3 D Female 8.885621
4 E Female 8.438872
5 F Female 6.680709
6 G Female 10.666258
7 H Female 6.050124
8 A Male 7.420860
9 B Male 4.104433
10 C Male 7.383738
11 D Male 10.526158
12 E Male 5.916262
13 F Male 11.857227
14 G Male 12.966913
15 H Male 7.865684
sns.histplot: axes-level plot
fig, axe = plt.subplots(figsize=(8, 6))
sns.histplot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', ax=axe)
# iterate through each set of containers
for c in axe.containers:
# add bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
sns.displot: figure-level plot
g = sns.displot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', height=6)
# iterate through each facet / supbplot
for axe in g.axes.flat:
# iteate through each set of containers
for c in axe.containers:
# add the bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
Original Answer - before matplotlib v3.4.2
The easiest way to plot a horizontal or vertical stacked bar, is to load the data into a pandas.DataFrame
This will plot, and annotate correctly, even when all categories ('People'), don't have all segments (e.g. some value is 0 or NaN)
Once the data is in the dataframe:
It's easier to manipulate and analyze
It can be plotted with the matplotlib engine, using:
pandas.DataFrame.plot.barh
label_text = f'{width}' for annotations
pandas.DataFrame.plot.bar
label_text = f'{height}' for annotations
SO: Vertical Stacked Bar Chart with Centered Labels
These methods return a matplotlib.axes.Axes or a numpy.ndarray of them.
Using the .patches method unpacks a list of matplotlib.patches.Rectangle objects, one for each of the sections of the stacked bar.
Each .Rectangle has methods for extracting the various values that define the rectangle.
Each .Rectangle is in order from left the right, and bottom to top, so all the .Rectangle objects, for each level, appear in order, when iterating through .patches.
The labels are made using an f-string, label_text = f'{width:.2f}%', so any additional text can be added as needed.
Plot and Annotate
Plotting the bar, is 1 line, the remainder is annotating the rectangles
# plot the dataframe with 1 line
ax = df.plot.barh(stacked=True, figsize=(8, 6))
# .patches is everything inside of the chart
for rect in ax.patches:
# Find where everything is located
height = rect.get_height()
width = rect.get_width()
x = rect.get_x()
y = rect.get_y()
# The height of the bar is the data value and can be used as the label
label_text = f'{width:.2f}%' # f'{width:.2f}' to format decimal values
# ax.text(x, y, text)
label_x = x + width / 2
label_y = y + height / 2
# only plot labels greater than given width
if width > 0:
ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=8)
# move the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Example with Missing Segment
# set one of the dataframe values to 0
df.iloc[4, 1] = 0
Note the annotations are all in the correct location from df.
For this case, the above answers work perfectly. The issue I had, and didn't find a plug-and-play solution online, was that I often have to plot stacked bars in multi-subplot figures, with many values, which tend to have very non-homogenous amplitudes.
(Note: I work usually with pandas dataframes, and matplotlib. I couldn't make the bar_label() method of matplotlib to work all the times.)
So, I just give a kind of ad-hoc, but easily generalizable solution. In this example, I was working with single-row dataframes (for power-exchange monitoring purposes per hour), so, my dataframe (df) had just one row.
(I provide an example figure to show how this can be useful in very densely-packed plots)
[enter image description here][1]
[1]: https://i.stack.imgur.com/9akd8.png
'''
This implementation produces a stacked, horizontal bar plot.
df --> pandas dataframe. Columns are used as the iterator, and only the firs value of each column is used.
waterfall--> bool: if True, apart from the stack-direction, also a perpendicular offset is added.
cyclic_offset_x --> list (of any length) or None: loop through these values to use as x-offset pixels.
cyclic_offset_y --> list (of any length) or None: loop through these values to use as y-offset pixels.
ax --> matplotlib Axes, or None: if None, creates a new axis and figure.
'''
def magic_stacked_bar(df, waterfall=False, cyclic_offset_x=None, cyclic_offset_y=None, ax=None):
if isinstance(cyclic_offset_x, type(None)):
cyclic_offset_x = [0, 0]
if isinstance(cyclic_offset_y, type(None)):
cyclic_offset_y = [0, 0]
ax0 = ax
if isinstance(ax, type(None)):
fig, ax = plt.subplots()
fig.set_size_inches(19, 10)
cycler = 0;
prev = 0 # summation variable to make it stacked
for c in df.columns:
if waterfall:
y = c ; label = "" # bidirectional stack
else:
y = 0; label = c # unidirectional stack
ax.barh(y=y, width=df[c].values[0], height=1, left=prev, label = label)
prev += df[c].values[0] # add to sum-stack
offset_x = cyclic_offset_x[divmod(cycler, len(cyclic_offset_x))[1]]
offset_y = cyclic_offset_y[divmod(cycler, len(cyclic_offset_y))[1]]
ax.annotate(text="{}".format(int(df[c].values[0])), xy=(prev - df[c].values / 2, y),
xytext=(offset_x, offset_y), textcoords='offset pixels',
ha='center', va='top', fontsize=8,
arrowprops=dict(facecolor='black', shrink=0.01, width=0.3, headwidth=0.3),
bbox=dict(boxstyle='round', facecolor='grey', alpha=0.5))
cycler += 1
if not waterfall:
ax.legend() # if waterfall, the index annotates the columns. If
# waterfall ==False, the legend annotates the columns
if isinstance(ax0, type(None)):
ax.set_title("Voi la")
ax.set_xlabel("UltraWatts")
plt.show()
else:
return ax
''' (Sometimes, it is more tedious and requires some custom functions to make the labels look alright.
'''
A, B = 80,80
n_units = df.shape[1]
cyclic_offset_x = -A*np.cos(2*np.pi / (2*n_units) *np.arange(n_units))
cyclic_offset_y = B*np.sin(2*np.pi / (2*n_units) * np.arange(n_units)) + B/2

Custom X-Axis Date Range Using Matplotlib

This is my first time asking a Python question online. I have always been able to find answers to my questions on this site..until now. I am trying to plot data that were developed using the Index Sequential Method, which is a technique for projecting historical data into the future. I have 105 charts that each cover 47 years of data. The first chart x-axis ranges from 1906-1952, the second 1907-1953, thir 1908-1954, etc. My problem is when I get to 1963, which is when the 47th year reverts back to the begining (1906). So the 1963 chart xaxis would look like this: 1963, 1964, 1965,...2008,2009,2010,1906. The 1964 chart xaxis would look like this: 1964, 1965, 1967,...2009, 2010, 1906, 1907.
I can get the data to plot fine, I just need help figuring out how to format the xaxis to accept the unique wrap-around situation when it occurs.
There are three charts per page (ax1, ax2, and ax3). yearList and chartList are the x and y data, respectively. The code below is part of a for loop that creates the yearList and chartList data sets, and it creates the charts with the wrong xaxis labels.
import matplotlib, pyPdf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as tkr
from matplotlib.ticker import MultipleLocator
import matplotlib.figure as figure
plt.rcParams['font.family'] = 'Times New Roman'
locator = mdates.YearLocator(2)
minorLocator = MultipleLocator(1)
dateFmt = mdates.DateFormatter('%Y')
datemin = min(yearList)
datemax = max(yearList)
fig, (ax1, ax2, ax3) = plt.subplots(3,1,sharex=False)
#3X3 Top to bottom
ax1.bar(yearList1, chartList1, width=200, align='center')
ax2.bar(yearList2, chartList2, width=200, align='center')
ax3.bar(yearList3, chartList3, width=200, align='center')
axList = [ax1, ax2, ax3]
for ax in axList:
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(dateFmt)
ax.xaxis.set_minor_locator(minorLocator)
ax.set_xlim(datemin - timedelta(365), datemax + timedelta(365))
ax.grid(1)
ax.set_ylim(0,30)
ax.set_yticks(np.arange(0, 31, 5))
ax.yaxis.set_minor_locator(minorLocator)
#Rotate tick labels 90 degrees
xlabels = ax.get_xticklabels()
for label in xlabels:
label.set_rotation(90)
fig.tight_layout()
plt.subplots_adjust(right=0.925)
plt.savefig('%s\\run.pdf' % outDir)
You are making a bar graph, which means the x-posistion has little to no meaning aside from the labels, so don't try to plot the bars vs their date, plot them against the integers, and then label them as you wish:
from itertools import izip
fig, axeses = plt.subplots(3,1,sharex=False)
#3X3 Top to bottom
for yl, cl, ax in izip([yearList1, yearList2, yearList3],
[chartList1, chartList2, chartist3],
axeses):
ax.bar(range(len(cl)), cl, align='center')
ax.set_ylim(0,30)
ax.set_yticks(np.arange(0, 31, 5))
ax.yaxis.set_minor_locator(minorLocator)
xlabels = [dateFmt(xl) for xl in yl] # make a list of formatted labels
ax.set_xticks(range(len(cl))) # put the tick markers under your bars
ax.set_xticklabels(xlabels) # set the labels to be your formatted years
#Rotate tick labels 90 degrees
for label in ax.get_xticklabels():
label.set_rotation(90)
# you only need to do this once
fig.tight_layout()
fig.subplots_adjust(right=0.925)
fig.savefig('%s\\run.pdf' % outDir)
Also see the demo and the docs set_xticks and set_xticklabels
You can use the ax.set_ticklabels() function to set the labels.
Example:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot([1, 2, 3, 4], [10, 20, 25, 30])
ax.xaxis.set_ticklabels(["foo" , "bar", "ouch"])
plt.show()
So, just add the transformation that you need, and create the labels list.
maybe something like this:
range = 47
yearList = [1967, 1968,..., last year]
range_of_years = map(lambda x: range(year,year + range), yearList)
for i in range(len(axis_list)):
axis_list[i].xaxis.set_ticklabels(years_list[i])

Categories

Resources