I have a pandas DataFrame with a MultiIndex:
group subgroup obs_1 obs_2
GroupA Elem1 4 0
Elem2 34 2
Elem3 0 10
GroupB Elem4 5 21
and so on. As noted in this SO question this is actually doable in matplotlib, but I'd rather (if possible) use the fact that I already know the hierarchy (thanks to the MultiIndex). Currently what's happening is that the index is shown as a tuple.
Is such a thing possible?
If you have just two levels in the MultiIndex, I believe the following will be easier:
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
plt.grid(True, 'both')
minor_XT = ax.get_xaxis().get_majorticklocs()
DF['XT_V'] = minor_XT
major_XT = DF.groupby(by=DF.index.get_level_values(0)).first()['XT_V'].tolist()
DF.__delitem__('XT_V')
ax.set_xticks(minor_XT, minor=True)
ax.set_xticklabels(DF.index.get_level_values(1), minor=True)
ax.tick_params(which='major', pad=15)
_ = plt.xticks(major_XT, (DF.index.get_level_values(0)).unique(), rotation=0)
And a bit of involving, but more general solution (doesn't matter how many levels you have):
def cvt_MIdx_tcklab(df):
Midx_ar = np.array(df.index.tolist())
Blank_ar = Midx_ar.copy()
col_idx = np.arange(Midx_ar.shape[0])
for i in range(Midx_ar.shape[1]):
val,idx = np.unique(Midx_ar[:, i], return_index=True)
Blank_ar[idx, i] = val
idx=~np.in1d(col_idx, idx)
Blank_ar[idx, i]=''
return map('\n'.join, np.fliplr(Blank_ar))
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
ax.set_xticklabels(cvt_MIdx_tcklab(DF), rotation=0)
I think that there isn't a nice and standard way of plotting multiindex dataframes. I found the following solution by #Stein to be aesthetically pleasant. I've adapted his example to your data:
import pandas as pd
import matplotlib.pyplot as plt
from itertools import groupby
import numpy as np
%matplotlib inline
group = ('Group_A', 'Group_B')
subgroup = ('elem1', 'elem2', 'elem3', 'elem4')
obs = ('obs_1', 'obs_2')
index = pd.MultiIndex.from_tuples([('Group_A','elem1'),('Group_A','elem2'),('Group_A','elem3'),('Group_B','elem4')],
names=['group', 'subgroup'])
values = np.array([[4,0],[43,2],[0,10],[5,21]])
df = pd.DataFrame(index=index)
df['obs_1'] = values[:,0]
df['obs_2'] = values[:,1]
def add_line(ax, xpos, ypos):
line = plt.Line2D([xpos, xpos], [ypos + .1, ypos],
transform=ax.transAxes, color='gray')
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def label_group_bar_table(ax, df):
ypos = -.1
scale = 1./df.index.size
for level in range(df.index.nlevels)[::-1]:
pos = 0
for label, rpos in label_len(df.index,level):
lxpos = (pos + .5 * rpos)*scale
ax.text(lxpos, ypos, label, ha='center', transform=ax.transAxes)
add_line(ax, pos*scale, ypos)
pos += rpos
add_line(ax, pos*scale , ypos)
ypos -= .1
ax = df.plot(kind='bar',stacked=False)
#Below 2 lines remove default labels
ax.set_xticklabels('')
ax.set_xlabel('')
label_group_bar_table(ax, df)
Which produces:
How to create a grouped bar chart of a hierarchical dataset with 2 levels
You can create a subplot for each group and stick them together with wspace=0. The width of each subplot must be corrected according to the number of subgroups by using the width_ratios argument in the gridspec_kw dictionary so that all the columns have the same width.
Then there are limitless formatting choices to make. In the following example, I choose to draw horizontal grid lines in the background and a separation line between the groups by using the minor tick marks.
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
# Create sample DataFrame with MultiIndex
df = pd.DataFrame(dict(group = ['GroupA', 'GroupA', 'GroupA', 'GroupB'],
subgroup = ['Elem1', 'Elem2', 'Elem3', 'Elem4'],
obs_1 = [4, 34, 0, 5],
obs_2 = [0, 2, 10, 21]))
df.set_index(['group', 'subgroup'], inplace=True)
# Create figure with a subplot for each group with a relative width that
# is proportional to the number of subgroups
groups = df.index.levels[0]
nplots = groups.size
plots_width_ratios = [df.xs(group).index.size for group in groups]
fig, axes = plt.subplots(nrows=1, ncols=nplots, sharey=True, figsize=(6, 4),
gridspec_kw = dict(width_ratios=plots_width_ratios, wspace=0))
# Loop through array of axes to create grouped bar chart for each group
alpha = 0.3 # used for grid lines, bottom spine and separation lines between groups
for group, ax in zip(groups, axes):
# Create bar chart with horizontal grid lines and no spines except bottom one
df.xs(group).plot.bar(ax=ax, legend=None, zorder=2)
ax.grid(axis='y', zorder=1, color='black', alpha=alpha)
for spine in ['top', 'left', 'right']:
ax.spines[spine].set_visible(False)
ax.spines['bottom'].set_alpha(alpha)
# Set and place x labels for groups
ax.set_xlabel(group)
ax.xaxis.set_label_coords(x=0.5, y=-0.15)
# Format major tick labels for subgroups
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center')
ax.tick_params(axis='both', which='major', length=0, pad=10)
# Set and format minor tick marks for separation lines between groups: note
# that except for the first subplot, only the right tick mark is drawn to avoid
# duplicate overlapping lines so that when an alpha different from 1 is chosen
# (like in this example) all the lines look the same
if ax.is_first_col():
ax.set_xticks([*ax.get_xlim()], minor=True)
else:
ax.set_xticks([ax.get_xlim()[1]], minor=True)
ax.tick_params(which='minor', length=45, width=0.8, color=[0, 0, 0, alpha])
# Add legend using the labels and handles from the last subplot
fig.legend(*ax.get_legend_handles_labels(), frameon=False,
bbox_to_anchor=(0.92, 0.5), loc="center left")
title = 'Grouped bar chart of a hierarchical dataset with 2 levels'
fig.suptitle(title, y=1.01, size=14);
Reference: this answer by gyx-hh
Related
I would like to add image annotations to a boxplot, akin to what they did with the bar chart in this post:
How can I add images to bars in axes (matplotlib)
My dataframe looks like this:
import pandas as pd
import numpy as np
names = ['PersonA', 'PersonB', 'PersonC', 'PersonD','PersonE','PersonF']
regions = ['NorthEast','NorthWest','SouthEast','SouthWest']
dates = pd.date_range(start = '2021-05-28', end = '2021-08-23', freq = 'D')
df = pd.DataFrame({'runtime': np.repeat(dates, len(names))})
df['name'] = len(dates)*names
df['A'] = 40 + 20*np.random.random(len(df))
df['B'] = .1 * np.random.random(len(df))
df['C'] = 1 +.5 * np.random.random(len(df))
df['region'] = np.resize(regions,len(df))
I tried to use the AnnotationBbox method which worked great for my time-series, but I'm not entirely sure if it can be applied here.
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.cbook import get_sample_data
fig, ax = plt.subplots(
df.boxplot(column='A', by=['name'],ax=ax,showmeans=True, fontsize=8, grid=False)
for name in names:
rslt_df = df[df['name']==name]
val = rslt_df['A'].values[0]
xy = (0, val)
fn = get_sample_data(f"{name}.png", asfileobj=False)
arr_img = plt.imread(fn, format='png')
imagebox = OffsetImage(arr_img, zoom=0.125)
imagebox.image.axes = ax
ab = AnnotationBbox(imagebox, xy,xybox=(15.,0),xycoords='data',boxcoords="offset points",pad=0,frameon=False)
ax.add_artist(ab)
The code in the OP if very similar to Add image annotations to bar plots axis tick labels, but needs to be modified because boxplots are slightly different the barplots.
The main issue was xy didn't have the correct values.
The xy and xybox parameters can be adjusted to place the images anywhere.
By default, boxplot positions the ticks at range(1, n+1), as explained in this answer
Reset the tick positions with a 0 index: positions=range(len(names))
df was created with names = ['PersonA', 'PersonB', 'PersonC'] since only 3 images were provided.
ax = df.boxplot(column='A', by=['name'], showmeans=True, fontsize=8, grid=False, positions=range(len(names)))
ax.set(xlabel=None, title=None)
# move the xtick labels
ax.set_xticks(range(len(names)))
ax.set_xticklabels(countries)
ax.tick_params(axis='x', which='major', pad=30)
# use the ytick values to locate the image
y = ax.get_yticks()[1]
for i, (name, data) in enumerate(df.groupby('name')):
xy = (i, y)
fn = f"data/so_data/2021-08-28/{name}.png" # path to file
arr_img = plt.imread(fn, format='png')
imagebox = OffsetImage(arr_img, zoom=0.125)
imagebox.image.axes = ax
ab = AnnotationBbox(imagebox, xy, xybox=(0, -30), xycoords='data', boxcoords="offset points", pad=0, frameon=False)
ax.add_artist(ab)
I noticed that the y-ticks don't always position themselves in a friendly manner, so I set a static Y-value (the x-axis). Creating a transform xycoords, allows placement directly below the x-axis, no matter the y-tick scale.
# BOX GRAPH PLOT
fig, ax = plt.subplots(facecolor='darkslategrey')
plt.style.use('dark_background')
ax = df.boxplot(column=str(c), by=['name'],ax=ax,showmeans=True, fontsize=8,grid=False,positions=range(len(top)))
ax.set(xlabel=None, title=None)
# move the xtick labels
ax.set_xticks(range(len(top)))
ax.tick_params(axis='x', which='major', pad=20)
# use the ytick values to locate the image
y = ax.get_xticks()[0]
for i, (name, data) in enumerate(df.groupby('name')):
xy = (i, y)
fn = f"{imgsrc}/{name}.png" # path to file
arr_img = plt.imread(fn, format='png')
imagebox = OffsetImage(arr_img, zoom=0.125)
imagebox.image.axes = ax
trans = ax.get_xaxis_transform()
ab = AnnotationBbox(imagebox, xy, xybox=(0, -15), xycoords=trans,boxcoords="offset points", pad=0, frameon=False)
ax.add_artist(ab)
plt.show()
I have a similar plot to the one answered in the link below:
two DataFrame plot in a single plot matplotlip
I made some modification to plots for df2 columns code block because i think that is where i have to modify but i could not yield the output.
a sample of the plot i want is this
this was how i modified it:
f, axes = plt.subplots(nrows=len(signals.columns)+1, sharex=True, )
i = 0
for col in df2.columns:
fig, axs = plt.subplots()
sns.regplot(x='', y='', data=df2, ax=axs[0])
df2[col].plot(ax=axes[i], color='grey')
axes[i].set_ylabel(col)
i+=1
I have seen that its wrong.
I tried this out, it seems like a head way :)
How do I make modification on this to get what i want:
f, axes = plt.subplots(nrows=len(signals.columns)+1, sharex=True, )
# plots for df2 columns
i = 0
for col in df2.columns:
lw=1
df2[col].plot(ax=axes[i], color='grey')
axes[i].set_ylim(0, 1)
axes[i].set_ylabel(col)
sns.rugplot(df2["P1"])
You have several options to make this graph. df1 and df2 are as defined in your previous question
The version with matplotlib.pyplot.scatter is faster to draw, but less faithful to the example. The version with seaborn.rugplot looks identical to the example, but takes longer to draw. I highlighted the important part of the code between comment lines ########
using matplotlib.pyplot.scatter
import seaborn as sns
import numpy as np
f, axes = plt.subplots(nrows=len(df2.columns)+1, sharex=True,
gridspec_kw={'height_ratios':np.append(np.repeat(1, len(df2.columns)), 3)})
####### variable part below #######
# plots for df2 columns
i = 0
for col in df2.columns:
axes[i].scatter(x=df2.index, y=np.repeat(0, len(df2)), c=df2[col], marker='|', cmap='Greys')
axes[i].set_ylim(-0.5, 0.5)
axes[i].set_yticks([0])
axes[i].set_yticklabels([col])
i+=1
###################################
## code to plot annotations
axes[-1].set_xlabel('Genomic position')
axes[-1].set_ylabel('annotations')
axes[-1].set_ylim(-0.5, 1.5)
axes[-1].set_yticks([0, 1])
axes[-1].set_yticklabels(['−', '+'])
for _, r in df1.iterrows():
marker = '|'
lw=1
if r['type'] == 'exon':
marker=None
lw=8
y = 1 if r['strand'] == '+' else 0
axes[-1].plot((r['start'], r['stop']), (y, y),
marker=marker, lw=lw,
solid_capstyle='butt',
color='#505050')
# remove space between plots
plt.subplots_adjust(hspace=0)
axes[-1].set_xlim(0, len(df2))
f.set_size_inches(6, 2)
using seaborn.rugplot
import seaborn as sns
import numpy as np
f, axes = plt.subplots(nrows=len(df2.columns)+1, sharex=True,
gridspec_kw={'height_ratios':np.append(np.repeat(1, len(df2.columns)), 3)})
####### variable part below #######
import matplotlib
import matplotlib.cm as cm
norm = matplotlib.colors.Normalize(vmin=0, vmax=1, clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.Greys)
# plots for df2 columns
i = 0
for col in df2.columns:
sns.rugplot(x=df2.index, color=list(map(mapper.to_rgba, df2[col])), height=1, ax=axes[i])
axes[i].set_yticks([0])
axes[i].set_yticklabels([col])
i+=1
###################################
## code to plot annotations
axes[-1].set_xlabel('Genomic position')
axes[-1].set_ylabel('annotations')
axes[-1].set_ylim(-0.5, 1.5)
axes[-1].set_yticks([0, 1])
axes[-1].set_yticklabels(['−', '+'])
for _, r in df1.iterrows():
marker = '|'
lw=1
if r['type'] == 'exon':
marker=None
lw=8
y = 1 if r['strand'] == '+' else 0
axes[-1].plot((r['start'], r['stop']), (y, y),
marker=marker, lw=lw,
solid_capstyle='butt',
color='#505050')
# remove space between plots
plt.subplots_adjust(hspace=0)
axes[-1].set_xlim(0, len(df2))
f.set_size_inches(6, 2)
I'm totally new at using Python for Power BI (or anything really).
I would like to add the value of the bar/scatter at the end of the line. (the datalabel)
Also to have a version where I could have the label inside of the scatter bubble would be cool.
Anyone who could help out here ?
All help appreciated
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group': dataset.Genre , 'values': dataset.Revenue})
val = list(dataset.SelectedGenre)
# Reorder it following the values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']== val, 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']== val , 150, 150)
# The vertival plot is made using the hline function
# I load the seaborn library only to benefit the nice looking feature
import seaborn as sns
val = ordered_df['values']
plt.hlines(y=my_range, xmin=0, xmax=val, color=my_color, alpha=1 , linewidth=8)
plt.scatter(val, my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
plt.box(False) #Turn of Black bx around visual
plt.show()
Found it myself
import matplotlib.pyplot as plt
import numpy as np
# Data
x = dataset.Revenue
y = dataset.Genre
labels = dataset.Revenue
val = list(dataset.SelectedGenre)
# Create the figure and axes objects
fig, ax = plt.subplots(1, figsize=(10, 6))
fig.suptitle('Example Of Labelled Scatterpoints')
my_color=np.where(y == val, 'orange', 'skyblue')
my_size=np.where( y == val , 2000, 2000)
# Plot the scatter points
ax.scatter(x, y,
color= my_color, # Color of the dots
s=1000, # Size of the dots
alpha=1, # Alpha of the dots
linewidths=1) # Size of edge around the dots
ax.hlines(y, xmin=0, xmax=x, color= my_color, alpha=1 , linewidth=8)
def human_format(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000
# add more suffixes if you need them
return '%.0f%s' % (round(num), ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
# Add the participant names as text labels for each point
for x_pos, y_pos, label in zip(x, y, labels):
ax.annotate(
human_format(label), # The label for this point
xy=(x_pos, y_pos), # Position of the corresponding point
xytext=(-8, 0), # Offset text by 7 points to the right
textcoords='offset points', # tell it to use offset points
ha='left', # Horizontally aligned to the left
va='center',
color = 'white') # Vertical alignment is centered
plt.box(False) #Turn of Black bx around visual
# Show the plot
plt.show()
I have a table that contains three different time characteristics according to two different parameters. I want to plot those parameters on x and y-axis and show bars of the three different times on the z-axis. I have created a simple bar plot where I plot one of the time characteristics:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
columns = ['R','Users','A','B','C']
df=pd.DataFrame({'R':[2,2,2,4,4,4,6,6,6,8,8],
'Users':[80,400,1000,80,400,1000,80,400,1000,80,400],
'A':[ 0.05381,0.071907,0.08767,0.04493,0.051825,0.05295,0.05285,0.0804,0.0967,0.09864,0.1097],
'B':[0.04287,0.83652,5.49683,0.02604,.045599,2.80836,0.02678,0.32621,1.41399,0.19025,0.2111],
'C':[0.02192,0.16217,0.71645, 0.25314,5.12239,38.92758,1.60807,262.4874,8493,11.6025,6288]},
columns=columns)
fig = plt.figure()
ax = plt.axes(projection="3d")
num_bars = 11
x_pos = df["R"]
y_pos = df["Users"]
z_pos = [0] * num_bars
x_size = np.ones(num_bars)/4
y_size = np.ones(num_bars)*50
z_size = df["A"]
ax.bar3d(x_pos, y_pos, z_pos, x_size, y_size, z_size, color='aqua')
plt.show()
This produces a simple 3d barplot:
However, I would like to plot similar bars next to the existing ones for the rest two columns (B and C) in a different color and add a plot legend as well. I could not figure out how to achieve this.
As a side question, is it as well possible to show only values from df at x- and y-axis? The values are 2-4-6-8 and 80-400-1000, I do not wish pyplot to add additional values on those axis.
I have managed to find a solution myself. To solve the problem with values I have added one to all times (to avoid negative log) and used np.log on all time columns. The values got on scale 0-10 this way and the plot got way easier to read. After that I used loop to go over each column and create corresponding values, positions and colors which I have added all to one list. I moved y_pos for each column so the columns do not plot on same position.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
columns = ['R','Users','A','B','C']
df=pd.DataFrame({'R':[2,2,2,4,4,4,6,6,6,8,8],
'Users':[80,400,1000,80,400,1000,80,400,1000,80,400],
'A':[ 0.05381,0.071907,0.08767,0.04493,0.051825,0.05295,0.05285,0.0804,0.0967,0.09864,0.1097],
'B':[0.04287,0.83652,5.49683,0.02604,.045599,2.80836,0.02678,0.32621,1.41399,0.19025,0.2111],
'C':[0.02192,0.16217,0.71645, 0.25314,5.12239,38.92758,1.60807,262.4874,8493,11.6025,6288]},
columns=columns)
fig = plt.figure(figsize=(10, 10))
ax = plt.axes(projection="3d")
df["A"] = np.log(df["A"]+1)
df["B"] = np.log(df["B"]+1)
df["C"] = np.log(df["C"]+1)
colors = ['r', 'g', 'b']
num_bars = 11
x_pos = []
y_pos = []
x_size = np.ones(num_bars*3)/4
y_size = np.ones(num_bars*3)*50
c = ['A','B','C']
z_pos = []
z_size = []
z_color = []
for i,col in enumerate(c):
x_pos.append(df["R"])
y_pos.append(df["Users"]+i*50)
z_pos.append([0] * num_bars)
z_size.append(df[col])
z_color.append([colors[i]] * num_bars)
x_pos = np.reshape(x_pos,(33,))
y_pos = np.reshape(y_pos,(33,))
z_pos = np.reshape(z_pos,(33,))
z_size = np.reshape(z_size,(33,))
z_color = np.reshape(z_color,(33,))
ax.bar3d(x_pos, y_pos, z_pos, x_size, y_size, z_size, color=z_color)
plt.xlabel('R')
plt.ylabel('Users')
ax.set_zlabel('Time')
from matplotlib.lines import Line2D
legend_elements = [Line2D([0], [0], marker='o', color='w', label='A',markerfacecolor='r', markersize=10),
Line2D([0], [0], marker='o', color='w', label='B',markerfacecolor='g', markersize=10),
Line2D([0], [0], marker='o', color='w', label='C',markerfacecolor='b', markersize=10)
]
# Make legend
ax.legend(handles=legend_elements, loc='best')
# Set view
ax.view_init(elev=35., azim=35)
plt.show()
Final plot:
When I make figure with 5 subplots and annotate the bars in each subplot, matplotlib appears to scale the figure so that the maximum from the largest y-axis scales to the smallest y-axis.
I can't describe the problem too well, but see this image:
where there's tons of white-space above where the figure should begin.
However, the figure would ideally look like this
When I set the 4 smallest axes to have the same upper y-limit as the largest axis, then the figure scales correctly, but for the purpose of the visualization, I would prefer not to do that.
Why does this happen? Is there anyway to control the figure so that it's not automatically scaled as in the first image? Or otherwise, a more appropriate way of plotting what I hope to achieve?
The code I'm using to generate the figure:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
department = ["100", "1,000", "10,000", \
"100,000", "1,000,000"]
quarter = ["Serial", "MPI", "CUDA", "Hybrid"]
budgets = np.array([[0.049979, 0.43584, 2.787366, 19.75062, 201.6935],\
[2.184624, 0.175213, 0.677837, 5.265575, 46.33678],\
[0.050294, 0.068537, 0.23739, 1.93778, 18.55734],\
[3.714284, 3.9917, 4.977599, 6.174967, 37.732232]])
budgets = np.transpose(budgets)
em = np.zeros((len(department), len(quarter)))
# set up barchart
x = np.arange(len(department)) # label locations
width = 0.8 # width of all the bars
# set up figure
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1, 5)
axes = [ax1, ax2, ax3, ax4, ax5]
# generate bars
rects = []
color = ["tomato", "royalblue", "limegreen", "orange"]
n = len(quarter)
for i in range(n):
bar_x = x - width/2.0 + i/float(n)*width + width/(n*2)
m = len(budgets[:,i])
for j in range(m):
bar_x = x[j] - width/2.0 + i/float(n)*width + width/(n*2)
e = budgets[j,i]
#bar_x = x - width/2.0 + i/float(n)*width + width/(n*2)
rects.append(axes[j].bar(bar_x, e, width=width/float(n), \
label=quarter[i], color=color[i]))
# set figure properties
fig.set_size_inches(12, 2.5)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
nAx = len(axes)
for i in range(nAx):
#axes[i].set_aspect("auto")
axes[i].tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False)
ax1.set_ylabel("Time (ms)")
for i in range(nAx):
axes[i].yaxis.grid(which="major", color="white", lw=0.75)
ax1.set_ylim([0, 4])
fig.suptitle("Time per iteration for differing dataset sizes") # title
for i in range(nAx):
axes[i].set_xlabel(department[i])
# annotate bars
for i in range(nAx):
for rect in rects:
j = 0;
for bar in rect:
y_bottom, y_top = axes[i].get_ylim() # axis limits
height = bar.get_height() # bar's height
va = 'bottom'
offset = 3
color = 'k'
fg = 'w'
# keep label within plot
if (y_top < 1.1 * height):
offset = -3
va = 'top'
color='w'
fg = 'k'
# annotate the bar
axes[i].annotate('{:.2f}'.format(height),
xy=(bar.get_x() + bar.get_width()/2, height),
xytext=(0,offset),
textcoords="offset points",
ha='center', va=va, color=color)
# set custom legend
legend_elements = [Patch(facecolor='tomato', label='Serial'),
Patch(facecolor='royalblue', label='MPI'),
Patch(facecolor='limegreen', label='CUDA'),
Patch(facecolor='orange', label='Hybrid')]
plt.legend(handles=legend_elements, loc="upper center", fancybox=False,
edgecolor='k', ncol=4, bbox_to_anchor=(-2, -0.1))
plt.show()
This is a partial answer.
This might be a bug, since I couldn't reproduce the problem until I switched to a Jupyter notebook in a Debian system (different hardware too). Your figure gets drawn correctly in my macOS Jupyter notebook, and in Debian when displayed from a .py script.
The problem appears to be with your annotations. If you make the tight_layout call after annotation, you might get a warning like this:
<ipython-input-80-f9f592f5efc5>:88: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all axes decorations.
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
It seems like the annotate function is calculating some totally wacky coordinates for your annotations, though the text ends up in the right spot. If you remove them, the white space disappears. You can try calculating the xy coordinates a for your annotations a different way. This might get you started:
axes[i].annotate('{:.2f}'.format(height),
xy=(bar.get_x() + bar.get_width()/2, height),
xytext=(0,offset),
textcoords="offset points",
xycoords="axes points", # change
ha='center', va=va, color=color)
Output:
To correctly calculate the points, you can try using the appropriate axis transformation, though again, I couldn't get it to work and it might be related to a bug.
try putting the fig.tight_layout(rect=[0, 0.03, 1, 0.95]) after all the plotting commands, as below.
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
department = ["100", "1,000", "10,000", \
"100,000", "1,000,000"]
quarter = ["Serial", "MPI", "CUDA", "Hybrid"]
budgets = np.array([[0.049979, 0.43584, 2.787366, 19.75062, 201.6935],\
[2.184624, 0.175213, 0.677837, 5.265575, 46.33678],\
[0.050294, 0.068537, 0.23739, 1.93778, 18.55734],\
[3.714284, 3.9917, 4.977599, 6.174967, 37.732232]])
budgets = np.transpose(budgets)
em = np.zeros((len(department), len(quarter)))
# set up barchart
x = np.arange(len(department)) # label locations
width = 0.8 # width of all the bars
# set up figure
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1, 5)
axes = [ax1, ax2, ax3, ax4, ax5]
# generate bars
rects = []
color = ["tomato", "royalblue", "limegreen", "orange"]
n = len(quarter)
for i in range(n):
bar_x = x - width/2.0 + i/float(n)*width + width/(n*2)
m = len(budgets[:,i])
for j in range(m):
bar_x = x[j] - width/2.0 + i/float(n)*width + width/(n*2)
e = budgets[j,i]
#bar_x = x - width/2.0 + i/float(n)*width + width/(n*2)
rects.append(axes[j].bar(bar_x, e, width=width/float(n), \
label=quarter[i], color=color[i]))
# set figure properties
fig.set_size_inches(12, 2.5)
#fig.tight_layout(rect=[0, 0.03, 1, 0.95])
nAx = len(axes)
for i in range(nAx):
#axes[i].set_aspect("auto")
axes[i].tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False)
ax1.set_ylabel("Time (ms)")
for i in range(nAx):
axes[i].yaxis.grid(which="major", color="white", lw=0.75)
ax1.set_ylim([0, 4])
fig.suptitle("Time per iteration for differing dataset sizes") # title
for i in range(nAx):
axes[i].set_xlabel(department[i])
# annotate bars
for i in range(nAx):
for rect in rects:
j = 0;
for bar in rect:
y_bottom, y_top = axes[i].get_ylim() # axis limits
height = bar.get_height() # bar's height
va = 'bottom'
offset = 3
color = 'k'
fg = 'w'
# keep label within plot
if (y_top < 1.1 * height):
offset = -3
va = 'top'
color='w'
fg = 'k'
# annotate the bar
axes[i].annotate('{:.2f}'.format(height),
xy=(bar.get_x() + bar.get_width()/2, height),
xytext=(0,offset),
textcoords="offset points",
ha='center', va=va, color=color)
# set custom legend
legend_elements = [Patch(facecolor='tomato', label='Serial'),
Patch(facecolor='royalblue', label='MPI'),
Patch(facecolor='limegreen', label='CUDA'),
Patch(facecolor='orange', label='Hybrid')]
plt.legend(handles=legend_elements, loc="upper center", fancybox=False,
edgecolor='k', ncol=4, bbox_to_anchor=(-2, -0.1))
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()