Flexible placement of labels in seaborn barplots - python

I would like to place labels on bars in Seaborn depending on how much space there is available.
For example in the example below I would like to use outside labels if they fit on the figure and inside labels when they don't. I would like to do this automatically for many plots, so I am looking for a smart way to do this flexibly. Any ideas?
import seaborn as sns
import matplotlib.pyplot as plt
titanic=sns.load_dataset('titanic')
titanic.head()
fig, ax = plt.subplots()
sns.countplot(
y='sex',
data=titanic,
orient="h",
ax=ax
)
for p in ax.patches:
perc = "{:.1f}%".format(100 * p.get_width() / titanic.shape[0])
x = p.get_width()
y = p.get_y() + p.get_height() / 2
# inside labels
ax.annotate(perc, (x*0.8, y), color="white")
# outside labels
ax.annotate(perc, (x*1.1, y), color="black")
Bad quality example of what I would like to achieve:

Does this help?
fig, ax = plt.subplots()
sns.countplot(
y='sex',
data=titanic,
orient="h",
ax=ax
)
values = []
for p in ax.patches:
x = p.get_width() # counts; dimension of the bar
y = p.get_y() + p.get_height() / 2
values.append(x) #collect the bar sizes in a list
x_max = max(values) #determine the largest bar size
#adjust percentage of the maximum bar size where inside/outside annotation occurs
cut_off = 0.8*x_max
for p in ax.patches:
perc = "{:.1f}%".format(100 * p.get_width() / titanic.shape[0]) #the label for the bar
x = p.get_width()
y = p.get_y() + p.get_height() / 2
if x >= cut_off:
ax.annotate(perc, (x*0.8, y), color="white")
else:
ax.annotate(perc, (x*1.1, y), color="black")

Related

How to render the texts on the axis of a seaborn plot?

I have a seaborn function as follows:
def plot(col,fig):
tag = col.value_counts().sort_index(ascending=False)
print(tag)
f, ax = plt.subplots(figsize=fig)
print(ax)
sns.countplot(y=col, data=tag, color="b",order=col.value_counts().index,palette="Set1")
total = len(col)
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width() + 0.02
y = p.get_y() + p.get_height()/2
ax.annotate(percentage, (x, y))
I am unable to render the text of the values on the y-axis. I am getting some random squares.
The values of the plot on y-axis are in Japanese.

module 'matplotlib.pyplot' has no attribute 'patches'

I'm trying to annotate precentage in horizontal barplot with matplotlib.
The problem is that when I try to add the precentage annotation I get error:
"module 'matplotlib.pyplot' has no attribute 'patches'
This is how I try to create the chart:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
%matplotlib inline
sns.set(style="whitegrid")
#sns.set_color_codes("Spectral")
plt.figure(2, figsize=(20,15))
the_grid = GridSpec(2, 2)
plt.subplot(the_grid[0, 1], title='Original Dataset')
sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
plt.xlabel('Count')
plt.ylabel('Land cover')
total = len(original)
print(total)
for p in plt.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width() + 0.02
y = p.get_y() + p.get_height()/2
plt.annotate(percentage, (x, y))
plt.show()
I get the bar plot but I do not get the annotation due to this error.
My end goal: to add teh rpecentage of each bar count out of the total
I think you only need to change :
sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
to:
ax = sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
and
for p in plt.patches:
to:
for p in ax.patches:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
%matplotlib inline
sns.set(style="whitegrid")
#sns.set_color_codes("Spectral")
plt.figure(2, figsize=(20,15))
the_grid = GridSpec(2, 2)
plt.subplot(the_grid[0, 1], title='Original Dataset')
horizontal = sns.barplot(x='count',y='land_cover_specific', data=df, palette='Spectral')
plt.xlabel('Count')
plt.ylabel('Land cover')
total = len(original)
# print(total)
# for p in plt.patches:
# percentage = '{:.1f}%'.format(100 * p.get_width()/total)
# x = p.get_x() + p.get_width() + 0.02
# y = p.get_y() + p.get_height()/2
# plt.annotate(percentage, (x, y))
def auto_label(horizontal_):
for index, rectangle in enumerate(horizontal_):
height = rectangle.get_height()
width = rectangle.get_width()
y_value = rectangle.get_y()
# color is an array containing either hex or rgb values
# each should map to the color of each in your barchart
plt.text(width + height/2., y_value, "%d" % 100 * width / total, color=color[index])
auto_label(horizontal)
plt.show()

Hist wrong binwidth with logarithmix x and y axis

I need to plot a hist with bot logarithmic y and x-axis, but I'd like also to have hist's bins displayed of same size.
How can I achieve this result with the following code (the x used is very long so I have intentionally avoided to insert it):
import matplotlib as plt
import numpy as np
fig, ax1 = plt.subplots()
hist, bins, _ = ax1.hist(x, log=True, color="red", rwidth=0.5)
plt.xscale("log")
np_x = np.array(x)
print("np_x.mean() = " + str(np_x.mean()))
plt.axvline(np_x.mean() * 1.1, color='lime', linestyle='dashed', linewidth=3,
label='Mean: {:.2f}'.format(np_x.mean()))
handles, labels = ax1.get_legend_handles_labels()
binwidth = math.floor(bins[1] - bins[0])
mylabel = "Binwidth: {}".format(binwidth) + ", Bins: {}".format(len(hist))
red_patch = mpatches.Patch(color='red', label=mylabel)
handles = [red_patch] + handles
labels = [mylabel] + labels
ax1.legend(handles, labels)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()

How to annotate stacked bar chart with the sum of each bar (Matplotlib)?

I have a grouped bar chart and each bar is stacked.
I have annotated each section of the stack with its individual value and now I would like to sum those values and annotate the total value(height) of each bar. I would like this annotation to be on top of each bar.
This is one of the two dataframes I am working from:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
I convert both dataframes (df_title and df_comps) into numpy arrays before plotting.
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
Here is the full code:
df_title
df_comps
len = df_title.shape[1]
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
df_comps_concat = np.concatenate((np.zeros((len,1)), df_comps.T.values), axis=1)
fig = plt.figure(figsize=(20,10))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
comps_colors = ['lightgoldenrodyellow', 'orange']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=np.sum(df_title_concat[j,:i]), color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.875*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
ax_2 = ax.bar(j, df_comps_concat[j,i], width=0.4, bottom=np.sum(df_comps_concat[j,:i]), color = comps_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_2.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.15*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
Here is a solution:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
df_title_concat = np.concatenate((np.zeros((len(df_title),1)), df_title.T.values), axis=1)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
bottom=np.sum(df_title_concat[j,:i])
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=bottom, color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
if bottom != 0:
ax.annotate('{:.2f}%'.format(height+bottom), (p.get_x()+0.875*width, (height+bottom)+0.3),
fontsize=16, fontweight='bold', color='black')
However, I would suggest you to rethink the whole approach you are following and change the plot to something like:
plt.bar(df_title.columns,df_title.loc['M'])
plt.bar(df_title.columns,df_title.loc['F'],bottom=df_title.loc['M'])

How to center the bar plot to show the difference of a certain column?

How to center the bar plot to show the difference of a certain column?
I have the following bar plot, done with matplotlib :
Note how the barplot is really bad. The difference between each bar cant really be seen properly. So what I want, is to use the red bar as the origin in the y-axis. That way, the other bars would show the difference (blue_bar(i) - redbar).
In other words, I want the value of the red bar in the y-axis to be the y-origin of the plot.
Again, in another words, the red bar is the accuracy obtained by my academic work. I want to plot the other article results compared/ IN RELATION to mine.
I made the following picture using paint.net to illustrate what I want.
Any other ideas/suggestions are really appreciated.
Appendix :
I used the following code to produce the first graphic :
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, 95.30, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo.png')
plt.show()
You could either set the y limits closer to the interesting values:
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.ylim(min(y) - 0.5, max(y) +0.5)
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo2.png')
plt.show()
Or you could plot it around zero, with your result being the new origin (but you will have to indicate by how much you shifted the origin somewhere in the legend or somewhere else):
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = np.asarray(accuracies) - my_acc
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
bars = ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
# ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.yticks([0, -0.3, -1.3, -2.3, -3.3, 0.7, 1.7], [95.30, 95, 94, 93, 92, 96, 97])
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(y) - 0.5, max(y) + 0.7)
def autolabel(rects):
for i in range(len(rects)):
rect = rects[i]
height = rect.get_height()
if (height >= 0):
ax.text(rect.get_x() + rect.get_width()/2.,
0.3 + height,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
if (height < 0):
ax.text(rect.get_x() + rect.get_width()/2.,
height - 0.3,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
autolabel(bars)
plt.savefig('foo.png')
plt.show()
Of course, your own result would not appear in the second plot, since it would have height zero.
I actually think the way you have it represented now is actually best -- meaning there isn't a huge difference in accuracy on a cursory level.
However, if you want to set the value of the red bar as the origin, try this:
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(95.30) # Sets the value of the red bar as the origin.
plt.savefig('foo.png')
plt.show()
Perhaps setting the minimum value of lowest accuracy of the article might make this graph more digestible.
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(accuracies), 100) # Sets the value of minimum accuracy as the origin and the max value as 100.
plt.savefig('foo.png')
plt.show()

Categories

Resources