When I increase the font size in matplotlib parts of the x-axis title are cut off. Is there a way to keep the plot/figure as it is and just tell matplotlib to increase the output page size instead of cutting off?
My code is:
import matplotlib.pyplot as plt
plt.style.use('paper')
fig = plt.figure()
ax1 = plt.subplot2grid((3,1),(0,0),rowspan=2)
ax2 = plt.subplot2grid((3,1),(2,0),sharex=ax1)
# first subplot
plt.setp(ax1.get_xticklabels(),visible=False)
ax1.set_ylabel("$\mathrm{Arbitrary\; Units}$", size=20)
ax1.yaxis.get_major_formatter().set_powerlimits((0,1))
ax1.get_yaxis().set_label_coords(-0.1,0.5)
x1,x2 = [],[]
for i in xrange(1000000):
r = rnd.random()
x1.append(r**(1/2))
x2.append(r**(1/3))
xmin = 0
xmax = 1
nbins = 50
h1,_,_ = ax1.hist(x1,bins=nbins,range=(xmin,xmax),normed=1,color="#FF0000",histtype='step')
h2,_,_ = ax1.hist(x2,bins=nbins,range=(xmin,xmax),normed=1,color="#00FF00",histtype='step')
# legend
old, = ax1.plot([0,0],color="#FF0000",label="2")
new, = ax1.plot([0,0],color="#00FF00",label="3")
ax1.legend(loc=2, ncol=1, borderaxespad=0.)
old.set_visible(False)
new.set_visible(False)
# second subplot
ax2.plot(np.linspace(xmin,xmax,nbins),h2/h1)
ax2.plot([xmin,xmax],[1,1],color="black")
ax2.set_ylim(ymin=0,ymax=1.99)
ax2.set_xlabel(r'$\phi_\mu$', size=20, weight="light")
ax2.set_ylabel("$\mathrm{Ratio}$", size=20)
ax2.get_yaxis().set_label_coords(-0.1,0.5)
fig.subplots_adjust(hspace=0.)
ax2.set_yticks(ax2.get_yticks()[1:-1])
The stylesheet that I use ('paper') is:
axes.axisbelow: True
axes.grid: False
axes.labelcolor: 333333
axes.linewidth: 0.6
backend: GTK
font.family: serif
font.size: 20
grid.alpha: 0.1
grid.color: black
grid.linestyle: -
grid.linewidth: 0.8
legend.fontsize: 20
legend.framealpha: 0
legend.numpoints: 1
lines.linestyle: -
lines.linewidth: 2
lines.markeredgewidth: 0
patch.linewidth: 2
text.color: 555555
text.usetex: True
xtick.color: 333333
ytick.color: 333333
This is what it looks like:
Since I want the subplots to share the x-axis I don't want any space between them.
Can anyone help me here?
Cheers.
Related
I'm looking a way to plot side by side stacked barplots to compare host composition of positive (Condition==True) and total cases in each country from my dataframe.
Here is a sample of the DataFrame.
id Location Host genus_name #ofGenes Condition
1 Netherlands Homo sapiens Escherichia 4.0 True
2 Missing Missing Klebsiella 3.0 True
3 Missing Missing Aeromonas 2.0 True
4 Missing Missing Glaciecola 2.0 True
5 Antarctica Missing Alteromonas 2.0 True
6 Indian Ocean Missing Epibacterium 2.0 True
7 Missing Missing Klebsiella 2.0 True
8 China Homo sapiens Escherichia 0 False
9 Missing Missing Escherichia 2.0 True
10 China Plantae kingdom Pantoea 0 False
11 China Missing Escherichia 2.0 True
12 Pacific Ocean Missing Halomonas 0 False
I need something similar to the image bellow, but I want to plot in percentage.
Can anyone help me?
I guess what you want is a stacked categorical bar plot, which cannot be directly plotted using seaborn. But you can achieve it by customizing one.
Import some necessary packages.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
Read the dataset. Considering your sample data is too small, I randomly generate some to make the plot looks good.
def gen_fake_data(data, size=400):
unique_values = []
for c in data.columns:
unique_values.append(data[c].unique())
new_data = pd.DataFrame({c: np.random.choice(unique_values[i], size=size)
for i, c in enumerate(data.columns)})
new_data = pd.concat([data, new_data])
new_data['id'] = new_data.index + 1
return new_data
data = pd.read_csv('data.csv')
new_data = gen_fake_data(data)
Define the stacked categorical bar plot
def stack_catplot(x, y, cat, stack, data, palette=sns.color_palette('Reds')):
ax = plt.gca()
# pivot the data based on categories and stacks
df = data.pivot_table(values=y, index=[cat, x], columns=stack,
dropna=False, aggfunc='sum').fillna(0)
ncat = data[cat].nunique()
nx = data[x].nunique()
nstack = data[stack].nunique()
range_x = np.arange(nx)
width = 0.8 / ncat # width of each bar
for i, c in enumerate(data[cat].unique()):
# iterate over categories, i.e., Conditions
# calculate the location of each bar
loc_x = (0.5 + i - ncat / 2) * width + range_x
bottom = 0
for j, s in enumerate(data[stack].unique()):
# iterate over stacks, i.e., Hosts
# obtain the height of each stack of a bar
height = df.loc[c][s].values
# plot the bar, you can customize the color yourself
ax.bar(x=loc_x, height=height, bottom=bottom, width=width,
color=palette[j + i * nstack], zorder=10)
# change the bottom attribute to achieve a stacked barplot
bottom += height
# make xlabel
ax.set_xticks(range_x)
ax.set_xticklabels(data[x].unique(), rotation=45)
ax.set_ylabel(y)
# make legend
plt.legend([Patch(facecolor=palette[i]) for i in range(ncat * nstack)],
[f"{c}: {s}" for c in data[cat].unique() for s in data[stack].unique()],
bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.grid()
plt.show()
Let's plot!
plt.figure(figsize=(6, 3), dpi=300)
stack_catplot(x='Location', y='#ofGenes', cat='Condition', stack='Host', data=new_data)
If you want to plot in percentile, calculate it in the raw dataset.
total_genes = new_data.groupby(['Location', 'Condition'], as_index=False)['#ofGenes'].sum().rename(
columns={'#ofGenes': 'TotalGenes'})
new_data = new_data.merge(total_genes, how='left')
new_data['%ofGenes'] = new_data['#ofGenes'] / new_data['TotalGenes'] * 100
plt.figure(figsize=(6, 3), dpi=300)
stack_catplot(x='Location', y='%ofGenes', cat='Condition', stack='Host', data=new_data)
You didn't specify how you would like to stack the bars, but you should be able to do something like this...
df = pd.read_csv('data.csv')
agg_df = df.pivot_table(index='Location', columns='Host', values='Condition', aggfunc='count')
agg_df.plot(kind='bar', stacked=True)
This is my first time drawing bar charts in python.
My df op:
key descript score
0 noodles taste 5
1 noodles color -2
2 noodles health 3
3 apple color 7
4 apple hard 9
My code:
import matplotlib.pyplot as plt
op['positive'] = op['score'] > 0
op['score'].plot(kind='barh', color=op.positive.map({True: 'r', False: 'k'}), use_index=True)
plt.show()
plt.savefig('sample1.png')
Output:
But this is not what I expected. I would like to draw two charts by different keys in this case with index and maybe use different colors like below:
How can I accomplish this?
Try:
fig, ax = plt.subplots(1,op.key.nunique(), figsize=(15,5), sharex=True)
i = 0
#Fix some data issues/typos
op['key']=op.key.str.replace('noodels','noodles')
for n, g in op.assign(positive=op['score'] >= 0).groupby('key'):
g.plot.barh(y='score', x='descript', ax=ax[i], color=g['positive'].map({True:'red',False:'blue'}), legend=False)\
.set_xlabel(n)
ax[i].set_ylabel('Score')
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].spines['top'].set_visible(False)
ax[i].spines['left'].set_position('zero')
i += 1
Output:
Update added moving of labels for yaxis - Thanks to this SO solution by # ImportanceOfBeingErnest
fig, ax = plt.subplots(1,op.key.nunique(), figsize=(15,5), sharex=True)
i = 0
#Fix some data issues/typos
op['key']=op.key.str.replace('noodels','noodles')
for n, g in op.assign(positive=op['score'] >= 0).groupby('key'):
g.plot.barh(y='score', x='descript', ax=ax[i], color=g['positive'].map({True:'red',False:'blue'}), legend=False)\
.set_xlabel(n)
ax[i].set_ylabel('Score')
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].spines['top'].set_visible(False)
ax[i].spines['left'].set_position('zero')
plt.setp(ax[i].get_yticklabels(), transform=ax[i].get_yaxis_transform())
i += 1
Output:
So I've successfully plotted pie charts on a map as markers using ax.scatter, but I'm having trouble with some of the wedges "exploding" out of the pie chart. I can't seem to find the reason for this in my code, and have been unable to find an explanation anywhere online. This code is based on the example here , which a colleague has also used and resulted in normal, uniform pie charts. Between us we can't find the issue, and no errors occur.
The code:
import numpy as np
import math
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
australia_data = np.zeros((24,12))
colors = ['red','yellow','blue','mediumorchid']
#pie chart locations
xlon=[146.7,166,101.6,137.4,145.1,113.6,169.7,113.3,176.0,139.6,148.9,124.2,132.4,142.0,129.6,148.0,116.5,142.8,141.7,128.0,113.6,120.7,128.3,148.6]
ylat=[-42.2,-19.2,-0.5,-3.5,-34.4,-8.7,-45.1,-1.0,-38.6,-26.7,-29.1,-20.0,-14.4,-18.9,-31.3,-6.6,-23.8,-3.4,-7.5,-25.6,3.8,-3.1,-1.9,-23.2]
#function to draw pie charts on map
def draw_pie(ax,X=0, Y=0, size = 1500):
xy = []
start = 0.17
ratios=[1/12.]*12
for ratio in ratios:
x = [0] + np.cos(np.linspace(2*math.pi*start,2*math.pi*(start+ratio))).tolist() #30
y = [0] + np.sin(np.linspace(2*math.pi*start,2*math.pi*(start+ratio))).tolist() #30
xy1=(zip(x,y))
xy.append(xy1)
start -= ratio
piecolors = []
for lt in range(12):
c = australia_data[b,lt]-1
c=int(c)
piecolors.append(colors[c])
for i, xyi in enumerate(xy):
ax.scatter([X],[Y] , marker=(xyi,0), s=size, facecolor=piecolors[i],linewidth=0.5,alpha=.7)
australia_data[:,11] = 1
australia_data[:,4] = 3
australia_data[:,1] = 2
fig = plt.figure()
ax = fig.add_axes([.05,.01,.79,.95])
x1 = 90 #left
x2 = 180 #right
y1 = -50 #bottom
y2 = 10 #top
#Create the map
m = Basemap(resolution='l',projection='merc', llcrnrlat=y1,urcrnrlat=y2,llcrnrlon=x1,urcrnrlon=x2,lat_ts=0) #,lat_ts=(x1+x2)/2
m.drawcoastlines()
#plots pie charts:
for b in range(24):
X,Y=m(xlon[b],ylat[b])
draw_pie(ax,X, Y,size=400)
plt.savefig('australia_pies.png',dpi=400)
Any ideas as to why this is happening (and how to fix it!) would be greatly appreciated!
Edit: it seems to be an issue with the number of wedges in the pie chart - reducing this to 6 results in uniform pies, but 7+ causes some wedges to "explode".
Looking at the scatter piecharts example, you forgot to adjust the size of the pie wedges according to the maximum distance from 0 to the the arc of the wedge. This is necessary because markers normalize the path given before drawing it, hence different wedges need different sizes in order to appear with the same size in the final plot.
import numpy as np
import matplotlib.pyplot as plt
#function to draw pie charts on map
def draw_pie(ax,X=0, Y=0, size = 1500):
xy = []; s=[]
start = 0.0
ratios=[1/12.]*12
for ratio in ratios:
x = [0] + np.cos(np.linspace(2*np.pi*start,2*np.pi*(start+ratio))).tolist() #30
y = [0] + np.sin(np.linspace(2*np.pi*start,2*np.pi*(start+ratio))).tolist() #30
xy1 = np.column_stack([x, y])
s1 = np.abs(xy1).max()
xy.append(xy1)
s.append(s1)
start -= ratio
for xyi, si in zip(xy,s):
ax.scatter([X],[Y] , marker=(xyi,0), s=size*si**2, edgecolor="k")
fig, ax = plt.subplots()
X,Y=166,50
draw_pie(ax,X, Y,size=3000)
plt.show()
I've created a bar plot with matplotlib pyplot library, using the following code:
fig = plt.figure(figsize=(15, 15), facecolor='white')
ax_left = fig.add_subplot(221)
ax_left.grid()
ax_left.bar(
[ix for ix in range(len(resp_tx.keys()))],
resp_tx.values,
#align='center',
tick_label=resp_tx.keys(),
color='#A6C307',
edgecolor='white',
)
ax_left.tick_params(axis='x', labelsize=10)
for label in ax_left.get_xticklabels():
label.set_rotation(45)
Where resp_tx is a pandas Series that looks like this:
APPROVED 90
ABANDONED_TRANSACTION 38
INTERNAL_PAYMENT_PROVIDER_ERROR 25
CANCELLED_TRANSACTION_MERCHANT 24
ENTITY_DECLINED 6
CANCELLED_TRANSACTION_PAYER 2
Name: resp_tx, dtype: int64
This is the result but I'm not able to put the labels in the right place. How can I put the tick labels in the center of the bar?
I resolved the issue with the following modifications:
resp_tx = self.tx_per_account[account].resp_tx.value_counts()
ax_left = fig.add_subplot(221)
ax_left.grid()
ax_left.bar(
[ix for ix in range(len(resp_tx.keys()))],
resp_tx.values,
color='#A6C307',
edgecolor='white',
)
ax_left.set_xticks([ix+0.4 for ix in range(len(resp_tx.keys()))])
ax_left.set_xticklabels(resp_tx.keys(), rotation=40, ha='right')
ax_left.set_ylim(top=(max(resp_tx.values)+max(resp_tx.values)*0.05))
ax_left.set_xlim(left=-0.2)
#ax_left.spines['top'].set_visible(False)
ax_left.spines['right'].set_visible(False)
#ax_left.spines['bottom'].set_visible(False)
ax_left.spines['left'].set_visible(False)
I am trying to replicate the following image in matplotlib and it seems barh is my only option. Though it appears that you can't stack barh graphs so I don't know what to do
If you know of a better python library to draw this kind of thing, please let me know.
This is all I could come up with as a start:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
y_pos = np.arange(len(people))
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
ax.barh(y_pos, bottomdata,color='r',align='center')
ax.barh(y_pos, topdata,color='g',align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
I would then have to add labels individually using ax.text which would be tedious. Ideally I would like to just specify the width of the part to be inserted then it updates the center of that section with a string of my choosing. The labels on the outside (e.g. 3800) I can add myself later, it is mainly the labeling over the bar section itself and creating this stacked method in a nice way I'm having problems with. Can you even specify a 'distance' i.e. span of color in any way?
Edit 2: for more heterogeneous data. (I've left the above method since I find it more usual to work with the same number of records per series)
Answering the two parts of the question:
a) barh returns a container of handles to all the patches that it drew. You can use the coordinates of the patches to aid the text positions.
b) Following these two answers to the question that I noted before (see Horizontal stacked bar chart in Matplotlib), you can stack bar graphs horizontally by setting the 'left' input.
and additionally c) handling data that is less uniform in shape.
Below is one way you could handle data that is less uniform in shape is simply to process each segment independently.
import numpy as np
import matplotlib.pyplot as plt
# some labels for each row
people = ('A','B','C','D','E','F','G','H')
r = len(people)
# how many data points overall (average of 3 per person)
n = r * 3
# which person does each segment belong to?
rows = np.random.randint(0, r, (n,))
# how wide is the segment?
widths = np.random.randint(3,12, n,)
# what label to put on the segment (xrange in py2.7, range for py3)
labels = range(n)
colors ='rgbwmc'
patch_handles = []
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
left = np.zeros(r,)
row_counts = np.zeros(r,)
for (r, w, l) in zip(rows, widths, labels):
print r, w, l
patch_handles.append(ax.barh(r, w, align='center', left=left[r],
color=colors[int(row_counts[r]) % len(colors)]))
left[r] += w
row_counts[r] += 1
# we know there is only one patch but could enumerate if expanded
patch = patch_handles[-1][0]
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x, y, "%d%%" % (l), ha='center',va='center')
y_pos = np.arange(8)
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
Which produces a graph like this , with a different number of segments present in each series.
Note that this is not particularly efficient since each segment used an individual call to ax.barh. There may be more efficient methods (e.g. by padding a matrix with zero-width segments or nan values) but this likely to be problem-specific and is a distinct question.
Edit: updated to answer both parts of the question.
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
segments = 4
# generate some multi-dimensional data & arbitrary labels
data = 3 + 10* np.random.rand(segments, len(people))
percentages = (np.random.randint(5,20, (len(people), segments)))
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
colors ='rgbwmc'
patch_handles = []
left = np.zeros(len(people)) # left alignment of data starts at zero
for i, d in enumerate(data):
patch_handles.append(ax.barh(y_pos, d,
color=colors[i%len(colors)], align='center',
left=left))
# accumulate the left-hand offsets
left += d
# go through all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x,y, "%d%%" % (percentages[i,j]), ha='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
You can achieve a result along these lines (note: the percentages I used have nothing to do with the bar widths, as the relationship in the example seems unclear):
See Horizontal stacked bar chart in Matplotlib for some ideas on stacking horizontal bar plots.
Imports and Test DataFrame
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1, seaborn 0.11.2
For vertical stacked bars see Stacked Bar Chart with Centered Labels
import pandas as pd
import numpy as np
# create sample data as shown in the OP
np.random.seed(365)
people = ('A','B','C','D','E','F','G','H')
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
# create the dataframe
df = pd.DataFrame({'Female': bottomdata, 'Male': topdata}, index=people)
# display(df)
Female Male
A 12.41 7.42
B 9.42 4.10
C 9.85 7.38
D 8.89 10.53
E 8.44 5.92
F 6.68 11.86
G 10.67 12.97
H 6.05 7.87
Updated with matplotlib v3.4.2
Use matplotlib.pyplot.bar_label
See How to add value labels on a bar chart for additional details and examples with .bar_label.
labels = [f'{v.get_width():.2f}%' if v.get_width() > 0 else '' for v in c ] for python < 3.8, without the assignment expression (:=).
Plotted using pandas.DataFrame.plot with kind='barh'
ax = df.plot(kind='barh', stacked=True, figsize=(8, 6))
for c in ax.containers:
# customize the label to account for cases when there might not be a bar section
labels = [f'{w:.2f}%' if (w := v.get_width()) > 0 else '' for v in c ]
# set the bar label
ax.bar_label(c, labels=labels, label_type='center')
# uncomment and use the next line if there are no nan or 0 length sections; just use fmt to add a % (the previous two lines of code are not needed, in this case)
# ax.bar_label(c, fmt='%.2f%%', label_type='center')
# move the legend
ax.legend(bbox_to_anchor=(1.025, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Using seaborn
sns.barplot does not have an option for stacked bar plots, however, sns.histplot and sns.displot can be used to create horizontal stacked bars.
seaborn typically requires the dataframe to be in a long, instead of wide, format, so use pandas.DataFrame.melt to reshape the dataframe.
Reshape dataframe
# convert the dataframe to a long form
df = df.reset_index()
df = df.rename(columns={'index': 'People'})
dfm = df.melt(id_vars='People', var_name='Gender', value_name='Percent')
# display(dfm)
People Gender Percent
0 A Female 12.414557
1 B Female 9.416027
2 C Female 9.846105
3 D Female 8.885621
4 E Female 8.438872
5 F Female 6.680709
6 G Female 10.666258
7 H Female 6.050124
8 A Male 7.420860
9 B Male 4.104433
10 C Male 7.383738
11 D Male 10.526158
12 E Male 5.916262
13 F Male 11.857227
14 G Male 12.966913
15 H Male 7.865684
sns.histplot: axes-level plot
fig, axe = plt.subplots(figsize=(8, 6))
sns.histplot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', ax=axe)
# iterate through each set of containers
for c in axe.containers:
# add bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
sns.displot: figure-level plot
g = sns.displot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', height=6)
# iterate through each facet / supbplot
for axe in g.axes.flat:
# iteate through each set of containers
for c in axe.containers:
# add the bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
Original Answer - before matplotlib v3.4.2
The easiest way to plot a horizontal or vertical stacked bar, is to load the data into a pandas.DataFrame
This will plot, and annotate correctly, even when all categories ('People'), don't have all segments (e.g. some value is 0 or NaN)
Once the data is in the dataframe:
It's easier to manipulate and analyze
It can be plotted with the matplotlib engine, using:
pandas.DataFrame.plot.barh
label_text = f'{width}' for annotations
pandas.DataFrame.plot.bar
label_text = f'{height}' for annotations
SO: Vertical Stacked Bar Chart with Centered Labels
These methods return a matplotlib.axes.Axes or a numpy.ndarray of them.
Using the .patches method unpacks a list of matplotlib.patches.Rectangle objects, one for each of the sections of the stacked bar.
Each .Rectangle has methods for extracting the various values that define the rectangle.
Each .Rectangle is in order from left the right, and bottom to top, so all the .Rectangle objects, for each level, appear in order, when iterating through .patches.
The labels are made using an f-string, label_text = f'{width:.2f}%', so any additional text can be added as needed.
Plot and Annotate
Plotting the bar, is 1 line, the remainder is annotating the rectangles
# plot the dataframe with 1 line
ax = df.plot.barh(stacked=True, figsize=(8, 6))
# .patches is everything inside of the chart
for rect in ax.patches:
# Find where everything is located
height = rect.get_height()
width = rect.get_width()
x = rect.get_x()
y = rect.get_y()
# The height of the bar is the data value and can be used as the label
label_text = f'{width:.2f}%' # f'{width:.2f}' to format decimal values
# ax.text(x, y, text)
label_x = x + width / 2
label_y = y + height / 2
# only plot labels greater than given width
if width > 0:
ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=8)
# move the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Example with Missing Segment
# set one of the dataframe values to 0
df.iloc[4, 1] = 0
Note the annotations are all in the correct location from df.
For this case, the above answers work perfectly. The issue I had, and didn't find a plug-and-play solution online, was that I often have to plot stacked bars in multi-subplot figures, with many values, which tend to have very non-homogenous amplitudes.
(Note: I work usually with pandas dataframes, and matplotlib. I couldn't make the bar_label() method of matplotlib to work all the times.)
So, I just give a kind of ad-hoc, but easily generalizable solution. In this example, I was working with single-row dataframes (for power-exchange monitoring purposes per hour), so, my dataframe (df) had just one row.
(I provide an example figure to show how this can be useful in very densely-packed plots)
[enter image description here][1]
[1]: https://i.stack.imgur.com/9akd8.png
'''
This implementation produces a stacked, horizontal bar plot.
df --> pandas dataframe. Columns are used as the iterator, and only the firs value of each column is used.
waterfall--> bool: if True, apart from the stack-direction, also a perpendicular offset is added.
cyclic_offset_x --> list (of any length) or None: loop through these values to use as x-offset pixels.
cyclic_offset_y --> list (of any length) or None: loop through these values to use as y-offset pixels.
ax --> matplotlib Axes, or None: if None, creates a new axis and figure.
'''
def magic_stacked_bar(df, waterfall=False, cyclic_offset_x=None, cyclic_offset_y=None, ax=None):
if isinstance(cyclic_offset_x, type(None)):
cyclic_offset_x = [0, 0]
if isinstance(cyclic_offset_y, type(None)):
cyclic_offset_y = [0, 0]
ax0 = ax
if isinstance(ax, type(None)):
fig, ax = plt.subplots()
fig.set_size_inches(19, 10)
cycler = 0;
prev = 0 # summation variable to make it stacked
for c in df.columns:
if waterfall:
y = c ; label = "" # bidirectional stack
else:
y = 0; label = c # unidirectional stack
ax.barh(y=y, width=df[c].values[0], height=1, left=prev, label = label)
prev += df[c].values[0] # add to sum-stack
offset_x = cyclic_offset_x[divmod(cycler, len(cyclic_offset_x))[1]]
offset_y = cyclic_offset_y[divmod(cycler, len(cyclic_offset_y))[1]]
ax.annotate(text="{}".format(int(df[c].values[0])), xy=(prev - df[c].values / 2, y),
xytext=(offset_x, offset_y), textcoords='offset pixels',
ha='center', va='top', fontsize=8,
arrowprops=dict(facecolor='black', shrink=0.01, width=0.3, headwidth=0.3),
bbox=dict(boxstyle='round', facecolor='grey', alpha=0.5))
cycler += 1
if not waterfall:
ax.legend() # if waterfall, the index annotates the columns. If
# waterfall ==False, the legend annotates the columns
if isinstance(ax0, type(None)):
ax.set_title("Voi la")
ax.set_xlabel("UltraWatts")
plt.show()
else:
return ax
''' (Sometimes, it is more tedious and requires some custom functions to make the labels look alright.
'''
A, B = 80,80
n_units = df.shape[1]
cyclic_offset_x = -A*np.cos(2*np.pi / (2*n_units) *np.arange(n_units))
cyclic_offset_y = B*np.sin(2*np.pi / (2*n_units) * np.arange(n_units)) + B/2