I'm trying to annotate precentage in horizontal barplot with matplotlib.
The problem is that when I try to add the precentage annotation I get error:
"module 'matplotlib.pyplot' has no attribute 'patches'
This is how I try to create the chart:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
%matplotlib inline
sns.set(style="whitegrid")
#sns.set_color_codes("Spectral")
plt.figure(2, figsize=(20,15))
the_grid = GridSpec(2, 2)
plt.subplot(the_grid[0, 1], title='Original Dataset')
sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
plt.xlabel('Count')
plt.ylabel('Land cover')
total = len(original)
print(total)
for p in plt.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width() + 0.02
y = p.get_y() + p.get_height()/2
plt.annotate(percentage, (x, y))
plt.show()
I get the bar plot but I do not get the annotation due to this error.
My end goal: to add teh rpecentage of each bar count out of the total
I think you only need to change :
sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
to:
ax = sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
and
for p in plt.patches:
to:
for p in ax.patches:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
%matplotlib inline
sns.set(style="whitegrid")
#sns.set_color_codes("Spectral")
plt.figure(2, figsize=(20,15))
the_grid = GridSpec(2, 2)
plt.subplot(the_grid[0, 1], title='Original Dataset')
horizontal = sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
plt.xlabel('Count')
plt.ylabel('Land cover')
total = len(original)
# print(total)
# for p in plt.patches:
# percentage = '{:.1f}%'.format(100 * p.get_width()/total)
# x = p.get_x() + p.get_width() + 0.02
# y = p.get_y() + p.get_height()/2
# plt.annotate(percentage, (x, y))
def auto_label(horizontal_):
for index, rectangle in enumerate(horizontal_):
height = rectangle.get_height()
width = rectangle.get_width()
y_value = rectangle.get_y()
# color is an array containing either hex or rgb values
# each should map to the color of each in your barchart
plt.text(width + height/2., y_value, "%d" % 100 * width / total, color=color[index])
auto_label(horizontal)
plt.show()
I have a grouped bar chart and each bar is stacked.
I have annotated each section of the stack with its individual value and now I would like to sum those values and annotate the total value(height) of each bar. I would like this annotation to be on top of each bar.
This is one of the two dataframes I am working from:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
I convert both dataframes (df_title and df_comps) into numpy arrays before plotting.
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
Here is the full code:
df_title
df_comps
len = df_title.shape[1]
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
df_comps_concat = np.concatenate((np.zeros((len,1)), df_comps.T.values), axis=1)
fig = plt.figure(figsize=(20,10))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
comps_colors = ['lightgoldenrodyellow', 'orange']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=np.sum(df_title_concat[j,:i]), color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.875*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
ax_2 = ax.bar(j, df_comps_concat[j,i], width=0.4, bottom=np.sum(df_comps_concat[j,:i]), color = comps_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_2.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.15*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
Here is a solution:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
df_title_concat = np.concatenate((np.zeros((len(df_title),1)), df_title.T.values), axis=1)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
bottom=np.sum(df_title_concat[j,:i])
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=bottom, color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
if bottom != 0:
ax.annotate('{:.2f}%'.format(height+bottom), (p.get_x()+0.875*width, (height+bottom)+0.3),
fontsize=16, fontweight='bold', color='black')
However, I would suggest you to rethink the whole approach you are following and change the plot to something like:
plt.bar(df_title.columns,df_title.loc['M'])
plt.bar(df_title.columns,df_title.loc['F'],bottom=df_title.loc['M'])
How to center the bar plot to show the difference of a certain column?
I have the following bar plot, done with matplotlib :
Note how the barplot is really bad. The difference between each bar cant really be seen properly. So what I want, is to use the red bar as the origin in the y-axis. That way, the other bars would show the difference (blue_bar(i) - redbar).
In other words, I want the value of the red bar in the y-axis to be the y-origin of the plot.
Again, in another words, the red bar is the accuracy obtained by my academic work. I want to plot the other article results compared/ IN RELATION to mine.
I made the following picture using paint.net to illustrate what I want.
Any other ideas/suggestions are really appreciated.
Appendix :
I used the following code to produce the first graphic :
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, 95.30, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo.png')
plt.show()
You could either set the y limits closer to the interesting values:
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.ylim(min(y) - 0.5, max(y) +0.5)
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo2.png')
plt.show()
Or you could plot it around zero, with your result being the new origin (but you will have to indicate by how much you shifted the origin somewhere in the legend or somewhere else):
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = np.asarray(accuracies) - my_acc
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
bars = ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
# ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.yticks([0, -0.3, -1.3, -2.3, -3.3, 0.7, 1.7], [95.30, 95, 94, 93, 92, 96, 97])
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(y) - 0.5, max(y) + 0.7)
def autolabel(rects):
for i in range(len(rects)):
rect = rects[i]
height = rect.get_height()
if (height >= 0):
ax.text(rect.get_x() + rect.get_width()/2.,
0.3 + height,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
if (height < 0):
ax.text(rect.get_x() + rect.get_width()/2.,
height - 0.3,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
autolabel(bars)
plt.savefig('foo.png')
plt.show()
Of course, your own result would not appear in the second plot, since it would have height zero.
I actually think the way you have it represented now is actually best -- meaning there isn't a huge difference in accuracy on a cursory level.
However, if you want to set the value of the red bar as the origin, try this:
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(95.30) # Sets the value of the red bar as the origin.
plt.savefig('foo.png')
plt.show()
Perhaps setting the minimum value of lowest accuracy of the article might make this graph more digestible.
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(accuracies), 100) # Sets the value of minimum accuracy as the origin and the max value as 100.
plt.savefig('foo.png')
plt.show()