Related
Having the following line in my plot code
ax.plot(x, pdf_individual, '--k', label = "single Gaussians")
, with pdf_individual being a list of lists, results in this picture:
Is there a way to just have "single Gaussians" once in the labels, instead of 6 times, which is the amount of single Gaussians for the Gaussian Mixture Model?
This is the whole post with the suggested solution
import matplotlib as mpl
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D
mpl.rcParams['figure.dpi'] = 600
test_input = input_list # THIS IS A 1D LIST with a few hundred items
X = np.asarray(test_input).reshape(-1,1)
N = np.arange(1, 11)
models = [None for i in range(len(N))]
for i in range(len(N)):
models[i] = GaussianMixture(N[i]).fit(X)
# compute the AIC and the BIC
AIC = [m.aic(X) for m in models]
BIC = [m.bic(X) for m in models]
fig = plt.figure(figsize=(12, 4))
fig.subplots_adjust(left=0.1, right=0.9,
bottom=0.21, top=0.9, wspace=0.3)
ax = fig.add_subplot(131)
M_best = models[np.argmin(AIC)]
comp_count = str(M_best)
x = np.linspace(0, 0.1, 100)
logprob = M_best.score_samples(x.reshape(-1, 1))
responsibilities = M_best.predict_proba(x.reshape(-1, 1))
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
left, width = .245, .5
bottom, height = .4, .5
right = left + width
top = bottom + height
plt.setp( ax.xaxis.get_majorticklabels(), rotation=-45, ha="left" )
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.hist(X, 30, density=True, histtype='stepfilled', alpha=0.4, label="Data")
ax.plot(x, pdf, '-k', color = "red", label='GMM')
for i, pdf_individual in enumerate(pdf_individual):
ax.plot(x, pdf_individual, '--k', label = "single Gaussians" if i == 0 else "")
#for pdf in pdf_individual[1:]: ax.plot(x, pdf, '--k')
ax.text(right, top, "Anzahl Komponenten: " + comp_count[-2],
horizontalalignment='center',
verticalalignment='bottom',
transform=ax.transAxes)
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
plt.legend()
plt.show()
It results in this error:
ValueError: x and y must have same first dimension, but have shapes (100,) and (6,)
EDIT:
Putting
pdf_individual = np.transpose(pdf_individual)
makes the code above work
I calculated the rttMeans and rttStds arrays. However, the value of rttStds makes the lower error less than 0.
rttStds = [3.330311915835426, 3.3189677330174883, 3.3319538853150386, 3.325173772304221, 3.3374145232695813]
How to set lower error to 0 instead of -#?
The python bar plot code is bellow.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(18,16)},style='ticks',font_scale = 1.5,font='serif')
N = 5
ind = ['RSU1', 'RSU2', 'RSU3', 'RSU4', 'RSU5'] # the x locations for the groups
width = 0.4 # the width of the bars: can also be len(x) sequence
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
p1 = plt.bar(ind, rttMeans, width, yerr=rttStds, log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
plt.margins(0.01, 0)
#Optional code - Make plot look nicer
plt.xticks(rotation=0)
i=0.18
for row in rttMeans:
plt.text(i, row, "{0:.1f}".format(row), color='black', ha="center")
i = i + 1
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
params = {'axes.titlesize':24,
'axes.labelsize':24,
'xtick.labelsize':28,
'ytick.labelsize':28,
'legend.fontsize': 24,
'axes.spines.right':False,
'axes.spines.top':False}
plt.rcParams.update(params)
plt.tick_params(axis="y", labelsize=28, labelrotation=20, labelcolor="black")
plt.tick_params(axis="x", labelsize=28, labelrotation=20, labelcolor="black")
plt.ylabel('RT Time (millisecond)', fontsize=24)
plt.title('# Participating RSUs', fontsize=24)
# plt.savefig('RSUs.pdf', bbox_inches='tight')
plt.show()
You can pass yerr as a pair [lower_errors, upper_errors] where you can control lower_errors :
lowers = np.minimum(rttStds,rttMeans)
p1 = plt.bar(ind, rttMeans, width, yerr=[lowers,rttStds], log=False, capsize = 16, color='green', hatch="/", error_kw=dict(elinewidth=3,ecolor='black'))
Output:
I have a grouped bar chart and each bar is stacked.
I have annotated each section of the stack with its individual value and now I would like to sum those values and annotate the total value(height) of each bar. I would like this annotation to be on top of each bar.
This is one of the two dataframes I am working from:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
I convert both dataframes (df_title and df_comps) into numpy arrays before plotting.
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
Here is the full code:
df_title
df_comps
len = df_title.shape[1]
df_title_concat = np.concatenate((np.zeros((len,1)), df_title.T.values), axis=1)
df_comps_concat = np.concatenate((np.zeros((len,1)), df_comps.T.values), axis=1)
fig = plt.figure(figsize=(20,10))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
comps_colors = ['lightgoldenrodyellow', 'orange']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=np.sum(df_title_concat[j,:i]), color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.875*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
ax_2 = ax.bar(j, df_comps_concat[j,i], width=0.4, bottom=np.sum(df_comps_concat[j,:i]), color = comps_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_2.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
if height > 2:
ax.annotate('{:.2f}%'.format(height), (p.get_x()+0.15*width, p.get_y()+.4*height),
fontsize=16, fontweight='bold', color='black')
Here is a solution:
df_title = pd.DataFrame(index=['F','M'],
data={'<10':[2.064897, 1.573255], '10-12':[3.933137, 4.326450], '13-17':[9.242871, 16.715831],
'18-24':[10.226155, 12.487709], '18-24':[8.161259, 10.717797], '35-44':[5.801377, 4.916421],
'45-54':[3.539823, 2.851524], '55+':[1.671583, 1.769912]})
df_title_concat = np.concatenate((np.zeros((len(df_title),1)), df_title.T.values), axis=1)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
title_colors = ['skyblue', 'royalblue']
for i in range(1,3):
for j in list(range(0, df_title.shape[1]-1)):
j += 1
bottom=np.sum(df_title_concat[j,:i])
ax_1 = ax.bar(j, df_title_concat[j,i], width=-0.4, bottom=bottom, color = title_colors[i-1],
edgecolor='black', linewidth=3, align='edge')
for p in ax_1.patches:
width, height = p.get_width(), p.get_height()
if bottom != 0:
ax.annotate('{:.2f}%'.format(height+bottom), (p.get_x()+0.875*width, (height+bottom)+0.3),
fontsize=16, fontweight='bold', color='black')
However, I would suggest you to rethink the whole approach you are following and change the plot to something like:
plt.bar(df_title.columns,df_title.loc['M'])
plt.bar(df_title.columns,df_title.loc['F'],bottom=df_title.loc['M'])
I'm trying to create a grid using a matplotlib function like imshow.
From this array:
[[ 1 8 13 29 17 26 10 4],
[16 25 31 5 21 30 19 15]]
I would like to plot the value as a color AND the text value itself (1,2, ...) on the same grid. This is what I have for the moment (I can only plot the color associated to each value):
from matplotlib import pyplot
import numpy as np
grid = np.array([[1,8,13,29,17,26,10,4],[16,25,31,5,21,30,19,15]])
print 'Here is the array'
print grid
fig1, (ax1, ax2)= pyplot.subplots(2, sharex = True, sharey = False)
ax1.imshow(grid, interpolation ='none', aspect = 'auto')
ax2.imshow(grid, interpolation ='bicubic', aspect = 'auto')
pyplot.show()
You want to loop over the values in grid, and use ax.text to add the label to the plot.
Fortunately, for 2D arrays, numpy has ndenumerate, which makes this quite simple:
for (j,i),label in np.ndenumerate(grid):
ax1.text(i,j,label,ha='center',va='center')
ax2.text(i,j,label,ha='center',va='center')
If for any reason you have to use a different extent from the one that is provided naturally by imshow the following method (even if more contrived) does the job:
size = 4
data = np.arange(size * size).reshape((size, size))
# Limits for the extent
x_start = 3.0
x_end = 9.0
y_start = 6.0
y_end = 12.0
extent = [x_start, x_end, y_start, y_end]
# The normal figure
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
im = ax.imshow(data, extent=extent, origin='lower', interpolation='None', cmap='viridis')
# Add the text
jump_x = (x_end - x_start) / (2.0 * size)
jump_y = (y_end - y_start) / (2.0 * size)
x_positions = np.linspace(start=x_start, stop=x_end, num=size, endpoint=False)
y_positions = np.linspace(start=y_start, stop=y_end, num=size, endpoint=False)
for y_index, y in enumerate(y_positions):
for x_index, x in enumerate(x_positions):
label = data[y_index, x_index]
text_x = x + jump_x
text_y = y + jump_y
ax.text(text_x, text_y, label, color='black', ha='center', va='center')
fig.colorbar(im)
plt.show()
If you want to put other type of data and not necessarily the values that you used for the image you can modify the script above in the following way (added values after data):
size = 4
data = np.arange(size * size).reshape((size, size))
values = np.random.rand(size, size)
# Limits for the extent
x_start = 3.0
x_end = 9.0
y_start = 6.0
y_end = 12.0
extent = [x_start, x_end, y_start, y_end]
# The normal figure
fig = plt.figure(figsize=(16, 12))
ax = fig.add_subplot(111)
im = ax.imshow(data, extent=extent, origin='lower', interpolation='None', cmap='viridis')
# Add the text
jump_x = (x_end - x_start) / (2.0 * size)
jump_y = (y_end - y_start) / (2.0 * size)
x_positions = np.linspace(start=x_start, stop=x_end, num=size, endpoint=False)
y_positions = np.linspace(start=y_start, stop=y_end, num=size, endpoint=False)
for y_index, y in enumerate(y_positions):
for x_index, x in enumerate(x_positions):
label = values[y_index, x_index]
text_x = x + jump_x
text_y = y + jump_y
ax.text(text_x, text_y, label, color='black', ha='center', va='center')
fig.colorbar(im)
plt.show()
I generated a bar plot, how can I display the value of the bar on each bar?
Current plot:
What I am trying to get:
My code:
import os
import numpy as np
import matplotlib.pyplot as plt
x = [u'INFO', u'CUISINE', u'TYPE_OF_PLACE', u'DRINK', u'PLACE', u'MEAL_TIME', u'DISH', u'NEIGHBOURHOOD']
y = [160, 167, 137, 18, 120, 36, 155, 130]
fig, ax = plt.subplots()
width = 0.75 # the width of the bars
ind = np.arange(len(y)) # the x locations for the groups
ax.barh(ind, y, width, color="blue")
ax.set_yticks(ind+width/2)
ax.set_yticklabels(x, minor=False)
plt.title('title')
plt.xlabel('x')
plt.ylabel('y')
#plt.show()
plt.savefig(os.path.join('test.png'), dpi=300, format='png', bbox_inches='tight') # use format='svg' or 'pdf' for vectorial pictures
Update: there's a built in method for this now! Scroll down a couple answers to "New in matplotlib 3.4.0".
If you can't upgrade that far, it doesn't take much code. Add:
for i, v in enumerate(y):
ax.text(v + 3, i + .25, str(v), color='blue', fontweight='bold')
result:
The y-values v are both the x-location and the string values for ax.text, and conveniently the barplot has a metric of 1 for each bar, so the enumeration i is the y-location.
New in matplotlib 3.4.0
There is now a built-in Axes.bar_label helper method to auto-label bars:
fig, ax = plt.subplots()
bars = ax.barh(indexes, values)
ax.bar_label(bars)
Note that for grouped/stacked bar plots, there will multiple bar containers, which can all be accessed via ax.containers:
for bars in ax.containers:
ax.bar_label(bars)
More details:
How to add thousands separators (commas) to labels
How to apply f-strings to labels
How to add spacing to labels
I have noticed api example code contains an example of barchart with the value of the bar displayed on each bar:
"""
========
Barchart
========
A bar plot with errorbars and height labels on individual bars
"""
import numpy as np
import matplotlib.pyplot as plt
N = 5
men_means = (20, 35, 30, 35, 27)
men_std = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, men_means, width, color='r', yerr=men_std)
women_means = (25, 32, 34, 20, 25)
women_std = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind + width, women_means, width, color='y', yerr=women_std)
# add some text for labels, title and axes ticks
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
output:
FYI What is the unit of height variable in "barh" of matplotlib? (as of now, there is no easy way to set a fixed height for each bar)
Use plt.text() to put text in the plot.
Example:
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
ind = np.arange(N)
#Creating a figure with some fig size
fig, ax = plt.subplots(figsize = (10,5))
ax.bar(ind,menMeans,width=0.4)
#Now the trick is here.
#plt.text() , you need to give (x,y) location , where you want to put the numbers,
#So here index will give you x pos and data+1 will provide a little gap in y axis.
for index,data in enumerate(menMeans):
plt.text(x=index , y =data+1 , s=f"{data}" , fontdict=dict(fontsize=20))
plt.tight_layout()
plt.show()
This will show the figure as:
For anyone wanting to have their label at the base of their bars just divide v by the value of the label like this:
for i, v in enumerate(labels):
axes.text(i-.25,
v/labels[i]+100,
labels[i],
fontsize=18,
color=label_color_list[i])
(note: I added 100 so it wasn't absolutely at the bottom)
To get a result like this:
I know it's an old thread, but I landed here several times via Google and think no given answer is really satisfying yet. Try using one of the following functions:
EDIT: As I'm getting some likes on this old thread, I wanna share an updated solution as well (basically putting my two previous functions together and automatically deciding whether it's a bar or hbar plot):
def label_bars(ax, bars, text_format, **kwargs):
"""
Attaches a label on every bar of a regular or horizontal bar chart
"""
ys = [bar.get_y() for bar in bars]
y_is_constant = all(y == ys[0] for y in ys) # -> regular bar chart, since all all bars start on the same y level (0)
if y_is_constant:
_label_bar(ax, bars, text_format, **kwargs)
else:
_label_barh(ax, bars, text_format, **kwargs)
def _label_bar(ax, bars, text_format, **kwargs):
"""
Attach a text label to each bar displaying its y value
"""
max_y_value = ax.get_ylim()[1]
inside_distance = max_y_value * 0.05
outside_distance = max_y_value * 0.01
for bar in bars:
text = text_format.format(bar.get_height())
text_x = bar.get_x() + bar.get_width() / 2
is_inside = bar.get_height() >= max_y_value * 0.15
if is_inside:
color = "white"
text_y = bar.get_height() - inside_distance
else:
color = "black"
text_y = bar.get_height() + outside_distance
ax.text(text_x, text_y, text, ha='center', va='bottom', color=color, **kwargs)
def _label_barh(ax, bars, text_format, **kwargs):
"""
Attach a text label to each bar displaying its y value
Note: label always outside. otherwise it's too hard to control as numbers can be very long
"""
max_x_value = ax.get_xlim()[1]
distance = max_x_value * 0.0025
for bar in bars:
text = text_format.format(bar.get_width())
text_x = bar.get_width() + distance
text_y = bar.get_y() + bar.get_height() / 2
ax.text(text_x, text_y, text, va='center', **kwargs)
Now you can use them for regular bar plots:
fig, ax = plt.subplots((5, 5))
bars = ax.bar(x_pos, values, width=0.5, align="center")
value_format = "{:.1%}" # displaying values as percentage with one fractional digit
label_bars(ax, bars, value_format)
or for horizontal bar plots:
fig, ax = plt.subplots((5, 5))
horizontal_bars = ax.barh(y_pos, values, width=0.5, align="center")
value_format = "{:.1%}" # displaying values as percentage with one fractional digit
label_bars(ax, horizontal_bars, value_format)
For pandas people :
ax = s.plot(kind='barh') # s is a Series (float) in [0,1]
[ax.text(v, i, '{:.2f}%'.format(100*v)) for i, v in enumerate(s)];
That's it.
Alternatively, for those who prefer apply over looping with enumerate:
it = iter(range(len(s)))
s.apply(lambda x: ax.text(x, next(it),'{:.2f}%'.format(100*x)));
Also, ax.patches will give you the bars that you would get with ax.bar(...). In case you want to apply the functions of #SaturnFromTitan or techniques of others.
I needed the bar labels too, note that my y-axis is having a zoomed view using limits on y axis. The default calculations for putting the labels on top of the bar still works using height (use_global_coordinate=False in the example). But I wanted to show that the labels can be put in the bottom of the graph too in zoomed view using global coordinates in matplotlib 3.0.2. Hope it help someone.
def autolabel(rects,data):
"""
Attach a text label above each bar displaying its height
"""
c = 0
initial = 0.091
offset = 0.205
use_global_coordinate = True
if use_global_coordinate:
for i in data:
ax.text(initial+offset*c, 0.05, str(i), horizontalalignment='center',
verticalalignment='center', transform=ax.transAxes,fontsize=8)
c=c+1
else:
for rect,i in zip(rects,data):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height,str(i),ha='center', va='bottom')
I was trying to do this with stacked plot bars. The code that worked for me was.
# Code to plot. Notice the variable ax.
ax = df.groupby('target').count().T.plot.bar(stacked=True, figsize=(10, 6))
ax.legend(bbox_to_anchor=(1.1, 1.05))
# Loop to add on each bar a tag in position
for rect in ax.patches:
height = rect.get_height()
ypos = rect.get_y() + height/2
ax.text(rect.get_x() + rect.get_width()/2., ypos,
'%d' % int(height), ha='center', va='bottom')
Simply add this:
for i in range(len(y)):
plt.text(x= y[i],y= i,s= y[i], c='b')
for every item in the list(y), print the value(s) as blue-colored text on the plot in the position specified (x=position on x-axis and y=position on y-axis)
Check this link
Matplotlib Gallery
This is how I used the code snippet of autolabel.
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
temp = df_launch.groupby(['yr_mt','year','month'])['subs_trend'].agg(subs_count='sum').sort_values(['year','month']).reset_index()
_, ax = plt.subplots(1,1, figsize=(30,10))
bar = ax.bar(height=temp['subs_count'],x=temp['yr_mt'] ,color ='g')
autolabel(bar)
ax.set_title('Monthly Change in Subscribers from Launch Date')
ax.set_ylabel('Subscriber Count Change')
ax.set_xlabel('Time')
plt.show()