I have data displayed in the following format:
values = np.array([10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
And I want to create a straight-up stacked bar chart like the following figure. Each element in the array belongs to a stacked bar.
I have searched to see how can I do this with matplotlib, but unfortunately, I still haven't found a way to do it. How can I do this?
AFAIK, there is now straightforward way to do it. You need to calculate exact position of bars yourself and then normalize it.
import numpy as np
import matplotlib.pyplot as plt
values = np.array([[10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
values_normalized = values/np.sum(values, axis=0)
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
r = [0, 1, 2, 3, 4] # position of the bars on the x-axis
names = ['A', 'B', 'C', 'D', 'E'] # names of groups
colors = ['lightblue', 'orange', 'lightgreen']
for i in range(3):
plt.bar(r, values_normalized[i], bottom=bottom_values[i], color=colors[i], edgecolor='white', width=1, tick_label=['a','b','c','d','e'])
for xpos, ypos, yval in zip(r, text_positions[i], values[i]):
plt.text(xpos, ypos, "N=%d"%yval, ha="center", va="center")
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")
plt.show()
There is a source that tells how to add text on top of bars. I'm a bit in a hurry right now so I hope this is useful and I'll update my answer next day if needed.
I've updated my answer. Adding text on top of the bars is tricky, it requires some calculations of their vertical positions.
Btw, I have refactored the most of code that is in a link I shared.
Python 3.8
matplotlib 3.3.1
numpy 1.19.1
Chat Result
import matplotlib.pyplot as plt
import numpy as np
values = np.array([[10, 12, 13, 5, 20], [30, 7, 10, 25, 2], [10, 12, 13, 5, 20]])
row, column = values.shape # (3, 5)
x_type = [x+1 for x in range(column)]
ind = [x for x, _ in enumerate(x_type)]
values_normalized = values/np.sum(values, axis=0)
value1, value2, value3 = values_normalized[0,:], values_normalized[1,:], values_normalized[2,:]
# Create figure
plt.figure(figsize=(8, 6))
plt.bar(ind, value1, width=0.8, label='Searies1', color='#5B9BD5')
plt.bar(ind, value2, width=0.8, label='Searies2', color='#C00000', bottom=value1)
plt.bar(ind, value3, width=0.8, label='Searies3', color='#70AD47', bottom=value1 + value2)
# Show text
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
c = list(range(column))
for i in range(3):
for xpos, ypos, yval in zip(c, text_positions[i], values[i]):
plt.text(xpos, ypos, yval, horizontalalignment='center', verticalalignment='center', color='white')
plt.xticks(ind, x_type)
plt.legend(loc='center', bbox_to_anchor=(0, 1.02, 1, 0.1), handlelength=1, handleheight=1, ncol=row)
plt.title('CHART TITLE', fontdict = {'fontsize': 16,'fontweight': 'bold', 'family': 'serif'}, y=1.1)
# Hide y-axis
plt.gca().axes.yaxis.set_visible(False)
plt.show()
Related
How can this vertical grouped bar chart be changed to a horizontal bar chart (grouped, and stacked)? I need help to alter the code such that the bars are displayed horizontally instead of vertically.
import matplotlib.pyplot as plt
import numpy as np
N = 9
labels = ['L', 'S', 'S', 'M', 'W', 'W', 'S', 'R', 'C']
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K_means = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
x = np.arange(len(labels)) # the label locations
width = 0.30 # the width of the bars
fig, ax = plt.subplots(figsize=(15, 9))
rects1 = ax.bar(x - width, M_means, width, label='M S and K', color=('#b02a2a'))
rects2 = ax.bar(x, PO_means, width, label='P O S and K', color=('#055cad'))
rects3 = ax.bar(x + width, K_means, width, label='M K', color=('#0b7d53'))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('% of workday', fontsize=32)
#ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize=32, rotation=15)
ax.legend(loc='upper right', frameon=False, fontsize=32, markerscale=2)
ax.bar_label(rects1, size = 32, padding=20, rotation=90)
ax.bar_label(rects2, size = 32, padding=20, rotation=90)
ax.bar_label(rects3, size = 32, padding=20, rotation=90)
plt.xticks(ha='center')
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(32)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(32)
plt.ylim(0, 100)
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
#fig.tight_layout()
plt.show()
Functionally, only two changes are needed:
Change ax.bar to ax.barh
Swap set_x* methods with set_y* methods, e.g. set_xticks() -> set_yticks() and so on
Semantically, the variables x and width should also be renamed to y and height.
import matplotlib.pyplot as plt
import numpy as np
N = 9
labels = list('LSSMWWSRC')
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
K_means = [2, 40, 21, 18, 3, 3, 2, 0.52, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
# rename x/width to y/height
y = np.arange(len(labels))
height = 0.30
fig, ax = plt.subplots()
# use ax.barh instead of ax.bar
rects1 = ax.barh(y - height, M_means, height, label='M S and K', color='#b02a2a')
rects2 = ax.barh(y, PO_means, height, label='P O S and K', color='#055cad')
rects3 = ax.barh(y + height, K_means, height, label='M K', color='#0b7d53')
# swap set_x* methods with set_y* methods
ax.set_xlabel('% of workday')
ax.set_yticks(y)
ax.set_yticklabels(labels)
ax.legend(loc='upper right', frameon=False, markerscale=2)
ax.bar_label(rects1, padding=10)
ax.bar_label(rects2, padding=10)
ax.bar_label(rects3, padding=10)
# ...
The easiest solution is to load the data into a pandas.DataFrame, and then use pandas.DataFrame.plot with kind='barh'. This is easier because pandas uses matplotlib as the default plotting backend, and the API groups the bars automatically.
This reduces the code to 14 lines (not including imports).
When using 'barh', xlabel= applies to the y-axis. Therefore, xlabel='' removes the y-axis label.
Adjust figsize=(12, 10) if planning to use smaller / larger font sizes.
See Adding value labels on a matplotlib bar chart for additional details about using .bar_label.
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1
import pandas as pd
import matplotlib.pylot as plt
# data
labels = ['L', 'S', 'S', 'M', 'W', 'W', 'S', 'R', 'C']
M_means = [1, 45, 28, 11, 4, 7, 1, 0.02, 0.3]
PO_means = [3, 58, 17, 8, 3, 8, 1, 0.06, 1]
K_means = [1, 44, 30, 11, 3, 7, 1, 0.01, 0.5]
# create a dict with the keys as the desired legend labels
data = {'labels': labels, 'M S and K': M_means, 'P O S and K': PO_means, 'M K': K_means}
# create dataframe
df = pd.DataFrame(data)
# plot: specify y=[...] if only certain columns are desired
ax = df.plot(kind='barh', x='labels', width=.85, figsize=(12, 10), xlabel='', color=['#b02a2a', '#055cad', '#0b7d53'])
ax.set_xlabel('% of workday', fontsize=15)
ax.set_xlim(0, 100)
ax.legend(loc='upper right', frameon=False, fontsize=15, markerscale=2)
for c in ax.containers:
ax.bar_label(c, label_type='edge', padding=1, size=15)
ax.tick_params(axis='both', which='both', labelsize=15)
ax.spines[['top', 'right']].set_visible(False)
Stacked
To manually create the stacked bar without pandas, see Horizontal stacked bar chart in Matplotlib
Use the parameter stacked=True
Some bar patches are to small for the label, so custom labels have been passed to the labels= parameter in .bar_label
Using := requires at least python 3.8. Otherwise use labels = [f'{v.get_width():.0f}' if v.get_width() > 1 else '' for v in c]
ax = df.plot(kind='barh', x='labels', width=.85, figsize=(12, 10), xlabel='',
color=['#b02a2a', '#055cad', '#0b7d53'], stacked=True)
ax.set_xlabel('% of workday', fontsize=15)
ax.set_xlim(0, 100)
ax.legend(loc='upper right', frameon=False, fontsize=15, markerscale=2)
for c in ax.containers:
# custom labels only show label size for values greater than 1
labels = [f'{w:.0f}' if (w := v.get_width()) > 1 else '' for v in c]
ax.bar_label(c, labels=labels, label_type='center', padding=1, size=15)
ax.tick_params(axis='both', which='both', labelsize=15)
ax.spines[['top', 'right']].set_visible(False)
I'm trying to allow my figure to share the same y axis, but have different scales along x axis. The problem is that when I try to map the second figure to the second axes (ax1 = ax.twiny), the figure seems to move forward to the right from where it should be. Here is a minimal working example that demonstrates my problem.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
f, ax = plt.subplots(1, figsize=(6,6))
ax.barh(r, greenBars, color='#b5ffb9', edgecolor='white', height=0.85)
df = pd.DataFrame({'group':['A', 'B', 'C', 'D', 'E'], 'values':[300,250,150,50,10] })
ax1 = ax.twiny()
ax1.hlines(y=groups, xmin=0, xmax=df['values'], color='black', linewidth=1.5);
plt.show()
where my expected outcome is to have the ax1.hlines move left-ward to the frame (as shown by the arrows in the image below). Does anybody have any suggestions as to how to fix this behaviour?
barh usually sets lower limit at 0 while plot or others set at a little less value for aesthetic. To fix this, manually set xlim for ax1:
...
f, ax = plt.subplots(1, figsize=(6,6))
ax.barh(r, greenBars, color='#b5ffb9', edgecolor='white', height=0.85)
df = pd.DataFrame({'group':['A', 'B', 'C', 'D', 'E'], 'values':[300,250,150,50,10] })
ax1 = ax.twiny()
ax1.hlines(y=df['group'], xmin=0, xmax=df['values'], color='black', linewidth=1.5);
# this is added
ax1.set_xlim(0)
plt.show()
Output:
import numpy as np
import matplotlib.pyplot as plt
n = 1000
x = np.arange(0, n)
y1 = np.random.normal(50, 4, n)
y2 = np.random.normal(25, 2.5, n)
y3 = np.random.normal(10, 1.1, n)
fig, (ax1, ax2, ax3) = plt.subplots(nrows = 3, ncols = 1)
ax1.plot(x, y1, 'royalblue')
ax1.set(xticks = [], title = 'Title')
ax2.plot(x, y2, 'darkorange')
ax2.set(xticks = [])
ax3.plot(x, y3, 'forestgreen')
ax3.set(xlabel = 'Random sample')
fig.legend(['First', 'Second', 'Third'])
plt.show()
I would like the ylabels to be shown in percentage, start at 0% and decrease. For example the blue one should go from [30, 40, 50, 60, 70] to [-57.1%, -42.9%, -28.6%, -14.3%, 0%]. The yellow one should go from [10, 20, 30, 40] to [-75%, -50%, -25%, 0%] and the green one should go from [5, 7.5, 10, 12.5, 15] to [-66.6%, -50%, -33.3%, -16.7%, 0%].
The rest of the graphs should look exactly the same, only the ylabels should change.
Just convert your current yticks to floats and change to the range you want them to be at before displaying:
import numpy as np
ticks = [float(x) for x in yvals]
ticks = np.array(ticks) - max(ticks)
yticklabels = ['{0:.1%}'.format(x) for x in ticks]
Do this for each plot separately.
I have 2 columns, sharing the same x-axis values, that I want to connect using vertical lines. This is the desired effect:
I was able to implement it in matplotlib:
for i, row in df.iterrows():
ax.plot([row['x']]*2, row[['y1', 'y2']], color='grey', lw=1, zorder=0, alpha=0.5)
How can I achieve this in Bokeh?
df = pd.DataFrame(np.random.normal(0, 5, (10, 2)), columns=['x','y'])
df_2 = df.copy()
df_2['y'] = df_2['y'] - 5
source = ColumnDataSource(df)
source_2 = ColumnDataSource(df_2)
myplot = figure(plot_width=600, plot_height=400, tools='hover,box_zoom,box_select,crosshair,reset')
myplot.circle('x', 'y', size=7, fill_alpha=0.5, source=source)
myplot.circle('x', 'y', size=7, fill_alpha=0.5, color='orange', source=source_2)
show(myplot, notebook_handle=True);
Bokeh code result:
Underlying data example: Y2 will always be larger than Y1.
You should use the segment glyph method:
from bokeh.plotting import figure, show
x = [1, 2, 3, 4, 5]
y1 = [6, 7, 2, 4, 5]
y2 = [10, 12, 11, 14, 13]
p = figure(plot_height=350)
p.segment(x, y1, x, y2, color="lightgrey", line_width=3)
p.circle(x, y1, color="blue", size=20)
p.circle(x, y2, color="red", size=20)
show(p)
This code passes the data directly to the glyph methods, but it would also be sensible to put everything in one ColumnDataSource that gets shared for all the glyphs.
I'm creating a stacked horizontal bar graph with 3 segments using the code below:
import matplotlib.pyplot as plt
import numpy as np
def create_stacked_hbar(data):
fig, ax = plt.subplots(figsize=(10, 10))
ylabels = list(data.keys())
labels = ['a', 'b', 'c', 'd', 'e', 'f']
c = []
v = []
for key, val in data.items():
c.append(key)
v.append(val)
v = np.array(v)
print(v)
plt.barh(range(len(c)), v[:,0], width=1, color='red',
edgecolor='w',linewidth=2, tick_label=ylabels, label=labels[0])
plt.barh(range(len(c)), v[:,1], width=1, left=v[:,0], color='orange',
edgecolor='w', linewidth=2, label=labels[1])
plt.barh(range(len(c)), v[:,2], width=1, left=(v[:,0]+v[:,1]), color='yellow',
edgecolor='w', linewidth=2, label=labels[2])
for p in ax.patches:
left, bottom, width, height = p.get_bbox().bounds
if width != 0.0:
ax.annotate(str(int(width)), xy=(left+width/2, bottom+height/2),
ha='center', va='center', size = 12)
plt.legend(bbox_to_anchor=(0, -0.15), loc=3, prop={'size': 14}, frameon=False)
plt.yticks(np.arange(len(ylabels)), ylabels)
plt.show()
data = {'A': [8, 7, 2], 'B': [0, 2, 0],
'C': [3, 2, 4], 'D': [0, 4, 0],
'E': [0, 1, 1], 'F': [0, 1, 0],
'G': [0, 0, 0]}
create_stacked_hbar(data)
The issue is that in attempting to set width = 1 in the bars throws a type error:
TypeError: <lambda>() got multiple values for argument 'width'
removing width allows to the code to work, but I do need to increase the width of the bars in the chart. I suspect this has to do with the annotation code I use in this case. Does anyone have any suggestions on getting around this?
Also note I am unable to use the "dataframe.plot.barh(data, stacked=True)" method via pandas to generate this chart.
You are making a horizontal bar plot, the width parameter corresponds to the data, so in your example you are passing both v[:,0] and 1 as width. If you are trying to specify the height because you do not desire whitespace between the bars you need to set height=1, consider this example:
import numpy as np
import matplotlib.pyplot as plt
# Seeded for reproducing
np.random.seed(1)
v1 = abs(np.random.randn(10))
v2 = abs(np.random.randn(10))
v3 = abs(np.random.randn(10))
c = range(10)
plt.title("Sample bar plot")
plt.barh(c, v1, height=1, alpha=0.8, color='r')
plt.barh(c, v2, height=1, left=v1, alpha=0.8, color='b')
plt.barh(c, v3, height=1, left=v1+v2, alpha=0.8, color='g')
plt.show()
This will give you
Where removing the height=1 specification would give you