Labels for grouped bar plot with uneven length of data? - python

I am working with a plot that contains an uneven length of data. I created another group of females (green bars), and I would like to label these two female groups F1 and F2.
Here is my code:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
labels = ['G1', 'G2', 'G3', 'G4']
labels2 = ['F1', 'F2']
male = [1, 3, 10, 20]
female = [2, 7]
female_2 = [3, 11]
x_male = np.arange(len(male))
x_female = np.arange(len(female))
offset_male = np.zeros(len(male))
offset_female = np.zeros(len(female))
shorter = min(len(x_male), len(x_female))
width = 0.25 # the width of the bars
offset_male[:shorter] = width/2
offset_female[:shorter] = width/2
fig, ax = plt.subplots()
rects1 = ax.bar(x_male - offset_male, male, width, label='male')
rects2 = ax.bar(x_female + offset_female, female, width, label='female')
rects3 = ax.bar(x_female + 3 * offset_female, female_2, width, label='female')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xticks(x_male)
ax.set_xticklabels(labels)
ax.legend()
fig.tight_layout()
plt.show()
Do you have any idea how I can do it?

blend all ticks together
ax.set_xticks(list(x_male)+list(x_female + 3 * offset_female))
ax.set_xticklabels(labels+labels2)

Related

How do I make the text appear after 3barplot in Matplotlib

Why doesn't zorder work in this case? I've tried using it but the text still ends up being covered by the bar plot towers.
import numpy as np
from matplotlib import pyplot as plt
Percentage_Differences_1 = np.array([ [7.94*(10**-10),7.94*(10**-9),7.94*(10**-8),7.94*(10**-7),7.94*(10**-6),7.94*(10**-5)],
[7.92*(10**-12),7.92*(10**-11),7.92*(10**-10),7.92*(10**-9),7.92*(10**-8),7.92*(10**-7)],
[7.72*(10**-14),7.72*(10**-13),7.72*(10**-12),7.72*(10**-11),7.72*(10**-10),7.72*(10**-9)],
[5.66*(10**-16),5.66*(10**-15),5.66*(10**-14),5.66*(10**-13),5.66*(10**-12),5.66*(10**-11)],
[1.49*(10**-17),1.49*(10**-16),1.49*(10**-15),1.49*(10**-14),1.49*(10**-13),1.49*(10**-12)],
[2.21*(10**-18),2.21*(10**-17),2.21*(10**-16),2.21*(10**-15),2.21*(10**-14),2.21*(10**-13)] ]) # Layer 1, 12
fig1 = plt.figure(dpi = 120, tight_layout = True)
fig1.set_size_inches(10, 7)
ax1 = fig1.add_subplot(111, projection='3d')
width = depth = 0.3
column_names = ['$10^{-6} m$','$10^{-5} m$','$10^{-4} m$','$10^{-3} m$','$10^{-2} m$','$10^{-1} m$']
row_names = ['$10^{-6} g$','$10^{-5} g$','$10^{-4} g$','$10^{-3} g$','$10^{-2} g$','$10^{-1} g$']
height_names = ['$10^{-2}$','$10^{-4}$','$10^{-6}$','$10^{-8}$','$10^{-10}$','$10^{-12}$','$10^{-14}$','$10^{-16}$','$10^{-18}$']
for x in range(0,6):
for y in range(0,6):
plot1 = ax1.bar3d(x, y, 0, width, depth, np.log10(Percentage_Differences_1[x][y]), color = "#0040bf", alpha=0.3, zorder = 1)
txt1 = ax1.text(x,y,1.15*np.log10(Percentage_Differences_1[x][y]),'{:.2e}'.format(Percentage_Differences_1[y][x]), verticalalignment='top', bbox=dict(facecolor='grey', alpha=0.5), zorder = 2)
ax1.view_init(-140, -30)
ax1.set_xticks(np.linspace(0, 6, num = 6))
ax1.set_yticks(np.linspace(0, 6, num = 6))
ax1.set_xticklabels(column_names)
ax1.set_yticklabels(row_names)
ax1.set_zticklabels(height_names)
ax1.set_xlabel("Mass", labelpad = 13, rotation = 45)
ax1.set_ylabel("Radius", labelpad = 10, rotation = 45)
ax1.set_zlabel("Deviation $\Delta$")
ax1.set_title("1st Initial Condition: $r(0)$ and $r'(0)$ of $\Theta(12) = 2.18 \\times 10^{7} m$", pad = 40)
plt.show()
I've tried using both set_zorder and zorder but the plot still ends up covering the majority of the text labels.
Change your zorder for a number larger than the number of bar objects, 100 for example:

fill between vertical curves in the sub plot

Dears,
I have the following csv file
depth
lst
dol
Anhd
sst
50
20
40
80
100
100
25
50
85
100
150
15
35
75
100
I take the data from csv to draw subplot contains four curves in the same subplot, I have filled by red color from left edge to first curve, also I have filled by blue color from last curve to right edge, I want to fill between entire curves in between first and last curve and make color legend.
the table is equal to csv file
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
import re
import json
test = r'D:\python\TEST-COMPOSITION.csv'
test =pd.read_csv(test)
mineral_names = test.drop(['depth'],axis=1)
mineral_names = list(mineral_names.columns.values)
colors = ["green", "gray"]
fig = plt.figure(figsize=(15, 12), dpi=100, tight_layout=True)
gs = gridspec.GridSpec(nrows=1, ncols=10, wspace=0)
fig.add_subplot(gs[0, 1])
for i in range(len(mineral_names)-1):
plt.plot(test[mineral_names[i]],test['depth'],linewidth=2, color='black')
for i in range(len(mineral_names)-1):
if i == 0:
left_col_value = 0
right_col_value = 100
span = abs(left_col_value - right_col_value)
cmap = plt.get_cmap('hot_r')
color_index = np.arange(left_col_value, right_col_value, span / 100)
for index in sorted(color_index):
index_value = (index - left_col_value) / span
plt.fill_betweenx(test['depth'],test[mineral_names[0]], left_col_value, where=test[mineral_names[i]] >= index, color="red")
if i == range(len(mineral_names)-1)[-1]:
left_col_value = 0
right_col_value = 100
span = abs(left_col_value - right_col_value)
cmap = plt.get_cmap('hot_r')
color_index = np.arange(left_col_value, right_col_value, span / 100)
for index in sorted(color_index):
index_value = (index - left_col_value) / span
plt.fill_betweenx(test['depth'],test[mineral_names[i]], right_col_value, where=test[mineral_names[i]] >= index, color="blue")
#if i ==1:
#plt.fill_betweenx(test['depth'], test[mineral_names[i+1]], test[mineral_names[i]],color = "green", alpha=0.4)
plt.gca().invert_yaxis()
plt.show()```
Here is an approach looping through the curves, and using a variable previous_curve which contains the position of the previous curve. At the start, the previous curve is all zeros. Similarly, the name of the previous curve can be saved and used as a label for the fill. All labels will appear in the default legend.
The example code below uses a gridspec with only 4 columns, to make the example plot a bit clearer.
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
import numpy as np
test = pd.DataFrame({'depth': [50, 100, 150],
'lst': [20, 25, 15],
'dol': [40, 50, 35],
'Anhd': [80, 85, 75],
'sst': [100, 100, 100]})
mineral_names = test.columns[1:]
fig = plt.figure(figsize=(15, 12), dpi=100, tight_layout=True)
gs = gridspec.GridSpec(nrows=1, ncols=4, wspace=0)
ax = fig.add_subplot(gs[0, 1])
for mineral_name in mineral_names[:-1]:
ax.plot(test[mineral_name], test['depth'], linewidth=2, color='black')
colors = ["red", "green", "gray", "blue"]
previous_curve = 0
previous_name = ''
for mineral_name, color in zip(mineral_names, colors):
ax.fill_betweenx(test['depth'], previous_curve, test[mineral_name], color=color, alpha=0.4,
label=f'{previous_name} - {mineral_name}')
previous_curve = test[mineral_name]
previous_name = mineral_name
ax.margins(x=0, y=0) # no white space in plot
ax.invert_yaxis()
ax.legend()
plt.show()

Python/Pandas - Different label colors on stacked bar

I would like to change the label color of the first block (dark color one) in each column for a better visualization. Is there any way?
ps: I wouldn't want to change the current color palette. Just the color label of the first block!
Code below:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
sns.set_style("white")
sns.set_context({"figure.figsize": (7, 5)})
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])
fig, ax = plt.subplots()
ax = df.plot.bar(stacked=True, cmap="cividis", alpha=1, edgecolor="black")
sns.despine(top=False, right=True, left=False, bottom=True)
#add text
for p in ax.patches:
left, bottom, width, height = p.get_bbox().bounds
if height > 0 :
ax.annotate("{0:.0f}".format(height), xy=(left+width/2, bottom+height/2), ha='center', va='center')
If you want to keep the same color map and change label color, you could specify color parameter in annotate function as follows.
ax.annotate("{0:.0f}".format(height), xy=(left+width/2, bottom+height/2), ha='center', va='center', color="white")
There are other configs such as font size etc.
The first block means 1, 4, 7 blocks in the array. Therefore, you could extract the first row of the data frame and check if height is one of the cell values using np.isin() like;
firstblocks = (df.iloc[:, 0])
for p in ax.patches:
left, bottom, width, height = p.get_bbox().bounds
if np.isin(p.get_height(), firstblocks):
ax.annotate("{0:.0f}".format(height), xy=(left + width / 2, bottom + height / 2), ha='center', va='center',
color="white", fontsize=12)
else:
ax.annotate("{0:.0f}".format(height), xy=(left + width / 2, bottom + height / 2), ha='center', va='center')
Hope this helps.

Bar chart with label name and value on top in pandas [duplicate]

This question already has answers here:
Annotate bars with values on Pandas bar plots
(4 answers)
Closed 12 months ago.
I have two columns where i used groupby option create a df called output_duration_per_device such as
output_duration_per_device=s3_dataset.groupby('DeviceType')['Output_media_duration'].sum().reset_index(name ='format_duration')
output_duration_per_device
DeviceType format_duration
0 Alchemist 8.166905e+06
1 CaptionMaker 1.310864e+07
2 Elemental 1.818089e+07
3 EncodingCloud 0.000000e+00
4 FfMpeg 5.258470e+07
5 FlipFactory 4.747456e+02
6 Rhozet 6.263442e+08
7 Tachyon 4.827463e+06
I can make a bar chat and find like this
output_duration_per_device=s3_dataset.groupby('DeviceType')['Output_media_duration'].sum().reset_index(name ='Device_duration').plot(kind ='bar', figsize=(10,7), fontsize=13)
output_duration_per_device.set_alpha(0.8)
output_duration_per_device.set_title('DeviceType Output Media duration')
output_duration_per_device.set_xlabel('DeviceType')
plt.ylabel('Output_media_duration')
which gives me
but i want like below
please help me
Using plot and annotating via height (I would recommend fiddling with the spacing):
from decimal import Decimal
ax = df.plot(x='DeviceType', y='format_duration', kind='bar')
for p in ax.patches:
ax.annotate('{:.2E}'.format(Decimal(str(p.get_height()))), (p.get_x(), p.get_height()))
plt.tight_layout()
plt.show()
"""
Barchart
A bar plot with errorbars and height labels on individual bars
"""
import numpy as np
import matplotlib.pyplot as plt
N = 5
men_means = (20, 35, 30, 35, 27)
men_std = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, men_means, width, color='r', yerr=men_std)
women_means = (25, 32, 34, 20, 25)
women_std = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind + width, women_means, width, color='y', yerr=women_std)
# add some text for labels, title and axes ticks
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
Bar Chart output
The following code is collected from Matplotlib official website. Please take a look. click here

How to display the value of the bar on each bar with pyplot.barh()

I generated a bar plot, how can I display the value of the bar on each bar?
Current plot:
What I am trying to get:
My code:
import os
import numpy as np
import matplotlib.pyplot as plt
x = [u'INFO', u'CUISINE', u'TYPE_OF_PLACE', u'DRINK', u'PLACE', u'MEAL_TIME', u'DISH', u'NEIGHBOURHOOD']
y = [160, 167, 137, 18, 120, 36, 155, 130]
fig, ax = plt.subplots()
width = 0.75 # the width of the bars
ind = np.arange(len(y)) # the x locations for the groups
ax.barh(ind, y, width, color="blue")
ax.set_yticks(ind+width/2)
ax.set_yticklabels(x, minor=False)
plt.title('title')
plt.xlabel('x')
plt.ylabel('y')
#plt.show()
plt.savefig(os.path.join('test.png'), dpi=300, format='png', bbox_inches='tight') # use format='svg' or 'pdf' for vectorial pictures
Update: there's a built in method for this now! Scroll down a couple answers to "New in matplotlib 3.4.0".
If you can't upgrade that far, it doesn't take much code. Add:
for i, v in enumerate(y):
ax.text(v + 3, i + .25, str(v), color='blue', fontweight='bold')
result:
The y-values v are both the x-location and the string values for ax.text, and conveniently the barplot has a metric of 1 for each bar, so the enumeration i is the y-location.
New in matplotlib 3.4.0
There is now a built-in Axes.bar_label helper method to auto-label bars:
fig, ax = plt.subplots()
bars = ax.barh(indexes, values)
ax.bar_label(bars)
Note that for grouped/stacked bar plots, there will multiple bar containers, which can all be accessed via ax.containers:
for bars in ax.containers:
ax.bar_label(bars)
More details:
How to add thousands separators (commas) to labels
How to apply f-strings to labels
How to add spacing to labels
I have noticed api example code contains an example of barchart with the value of the bar displayed on each bar:
"""
========
Barchart
========
A bar plot with errorbars and height labels on individual bars
"""
import numpy as np
import matplotlib.pyplot as plt
N = 5
men_means = (20, 35, 30, 35, 27)
men_std = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, men_means, width, color='r', yerr=men_std)
women_means = (25, 32, 34, 20, 25)
women_std = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind + width, women_means, width, color='y', yerr=women_std)
# add some text for labels, title and axes ticks
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
output:
FYI What is the unit of height variable in "barh" of matplotlib? (as of now, there is no easy way to set a fixed height for each bar)
Use plt.text() to put text in the plot.
Example:
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
ind = np.arange(N)
#Creating a figure with some fig size
fig, ax = plt.subplots(figsize = (10,5))
ax.bar(ind,menMeans,width=0.4)
#Now the trick is here.
#plt.text() , you need to give (x,y) location , where you want to put the numbers,
#So here index will give you x pos and data+1 will provide a little gap in y axis.
for index,data in enumerate(menMeans):
plt.text(x=index , y =data+1 , s=f"{data}" , fontdict=dict(fontsize=20))
plt.tight_layout()
plt.show()
This will show the figure as:
For anyone wanting to have their label at the base of their bars just divide v by the value of the label like this:
for i, v in enumerate(labels):
axes.text(i-.25,
v/labels[i]+100,
labels[i],
fontsize=18,
color=label_color_list[i])
(note: I added 100 so it wasn't absolutely at the bottom)
To get a result like this:
I know it's an old thread, but I landed here several times via Google and think no given answer is really satisfying yet. Try using one of the following functions:
EDIT: As I'm getting some likes on this old thread, I wanna share an updated solution as well (basically putting my two previous functions together and automatically deciding whether it's a bar or hbar plot):
def label_bars(ax, bars, text_format, **kwargs):
"""
Attaches a label on every bar of a regular or horizontal bar chart
"""
ys = [bar.get_y() for bar in bars]
y_is_constant = all(y == ys[0] for y in ys) # -> regular bar chart, since all all bars start on the same y level (0)
if y_is_constant:
_label_bar(ax, bars, text_format, **kwargs)
else:
_label_barh(ax, bars, text_format, **kwargs)
def _label_bar(ax, bars, text_format, **kwargs):
"""
Attach a text label to each bar displaying its y value
"""
max_y_value = ax.get_ylim()[1]
inside_distance = max_y_value * 0.05
outside_distance = max_y_value * 0.01
for bar in bars:
text = text_format.format(bar.get_height())
text_x = bar.get_x() + bar.get_width() / 2
is_inside = bar.get_height() >= max_y_value * 0.15
if is_inside:
color = "white"
text_y = bar.get_height() - inside_distance
else:
color = "black"
text_y = bar.get_height() + outside_distance
ax.text(text_x, text_y, text, ha='center', va='bottom', color=color, **kwargs)
def _label_barh(ax, bars, text_format, **kwargs):
"""
Attach a text label to each bar displaying its y value
Note: label always outside. otherwise it's too hard to control as numbers can be very long
"""
max_x_value = ax.get_xlim()[1]
distance = max_x_value * 0.0025
for bar in bars:
text = text_format.format(bar.get_width())
text_x = bar.get_width() + distance
text_y = bar.get_y() + bar.get_height() / 2
ax.text(text_x, text_y, text, va='center', **kwargs)
Now you can use them for regular bar plots:
fig, ax = plt.subplots((5, 5))
bars = ax.bar(x_pos, values, width=0.5, align="center")
value_format = "{:.1%}" # displaying values as percentage with one fractional digit
label_bars(ax, bars, value_format)
or for horizontal bar plots:
fig, ax = plt.subplots((5, 5))
horizontal_bars = ax.barh(y_pos, values, width=0.5, align="center")
value_format = "{:.1%}" # displaying values as percentage with one fractional digit
label_bars(ax, horizontal_bars, value_format)
For pandas people :
ax = s.plot(kind='barh') # s is a Series (float) in [0,1]
[ax.text(v, i, '{:.2f}%'.format(100*v)) for i, v in enumerate(s)];
That's it.
Alternatively, for those who prefer apply over looping with enumerate:
it = iter(range(len(s)))
s.apply(lambda x: ax.text(x, next(it),'{:.2f}%'.format(100*x)));
Also, ax.patches will give you the bars that you would get with ax.bar(...). In case you want to apply the functions of #SaturnFromTitan or techniques of others.
I needed the bar labels too, note that my y-axis is having a zoomed view using limits on y axis. The default calculations for putting the labels on top of the bar still works using height (use_global_coordinate=False in the example). But I wanted to show that the labels can be put in the bottom of the graph too in zoomed view using global coordinates in matplotlib 3.0.2. Hope it help someone.
def autolabel(rects,data):
"""
Attach a text label above each bar displaying its height
"""
c = 0
initial = 0.091
offset = 0.205
use_global_coordinate = True
if use_global_coordinate:
for i in data:
ax.text(initial+offset*c, 0.05, str(i), horizontalalignment='center',
verticalalignment='center', transform=ax.transAxes,fontsize=8)
c=c+1
else:
for rect,i in zip(rects,data):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height,str(i),ha='center', va='bottom')
I was trying to do this with stacked plot bars. The code that worked for me was.
# Code to plot. Notice the variable ax.
ax = df.groupby('target').count().T.plot.bar(stacked=True, figsize=(10, 6))
ax.legend(bbox_to_anchor=(1.1, 1.05))
# Loop to add on each bar a tag in position
for rect in ax.patches:
height = rect.get_height()
ypos = rect.get_y() + height/2
ax.text(rect.get_x() + rect.get_width()/2., ypos,
'%d' % int(height), ha='center', va='bottom')
Simply add this:
for i in range(len(y)):
plt.text(x= y[i],y= i,s= y[i], c='b')
for every item in the list(y), print the value(s) as blue-colored text on the plot in the position specified (x=position on x-axis and y=position on y-axis)
Check this link
Matplotlib Gallery
This is how I used the code snippet of autolabel.
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
temp = df_launch.groupby(['yr_mt','year','month'])['subs_trend'].agg(subs_count='sum').sort_values(['year','month']).reset_index()
_, ax = plt.subplots(1,1, figsize=(30,10))
bar = ax.bar(height=temp['subs_count'],x=temp['yr_mt'] ,color ='g')
autolabel(bar)
ax.set_title('Monthly Change in Subscribers from Launch Date')
ax.set_ylabel('Subscriber Count Change')
ax.set_xlabel('Time')
plt.show()

Categories

Resources