First boxplot missing or not visible - python

Why is the first boxplot missing? There's supoussed to be 24 boxplots, but only 23 are being display as you can see in the image. Who's size do i have to change to make it visible? I've tried changing the size of the figure but its the same.
Not sure if it helps, but this is the code:
def obtenerBoxplotsAnualesIntercalados(self, directorioEntrada, directorioSalida):
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ciudades = ["CO","CR"]
anios = ["2011", "2012", "2013"]
for anio in anios:
fig = plt.figure()
fig.set_size_inches(14.3, 9)
ax = plt.axes()
plt.hold(True)
bpArray = []
i=0
ticks = []
for mes in range(len(meses)):
archivoCO = open(directorioEntrada+"/"+"CO"+"-"+self.mesStr(mes+1)+"-"+anio, encoding = "ISO-8859-1")
archivoCR = open(directorioEntrada+"/"+"CR"+"-"+self.mesStr(mes+1)+"-"+anio, encoding = "ISO-8859-1")
datosCOmes = self.obtenerDatosmensuales(archivoCO)
datosCRmes = self.obtenerDatosmensuales(archivoCR)
data = [ [int(float(datosCOmes[2])), int(float(datosCOmes[0])), int(float(datosCOmes[1]))],
[int(float(datosCRmes[2])), int(float(datosCRmes[0])), int(float(datosCRmes[1]))] ]
bpArray.append(plt.boxplot(data, positions=[i,i+1], widths=0.5))
ticks.append(i+0.5)
i=i+2
hB, = plt.plot([1,1],'b-')
hR, = plt.plot([1,1],'r-')
plt.legend((hB, hR),('Caleta', 'Comodoro'))
hB.set_visible(False)
hR.set_visible(False)
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
self.setBoxColors(bpArray)
plt.title('Variación de temperatura mensual Caleta Olivia-Comodoro Rivadavia. Año '+anio)
plt.savefig(directorioSalida+"/asdasd"+str(anio)+".ps", orientation='landscape', papertype='A4' )

Your boxplot is there but it's hidden. This reproduces your problem:
import matplotlib
import numpy as np
data = np.random.normal(10,2,100*24).reshape(24,-1) # let's get 12 pairs of arrays to plot
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ax = plt.axes()
plt.hold(True)
i=0
ticks = []
for mes in range(0,len(meses)):
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i+=2
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
plt.show()
Notice that you are defining your positions as ranging from 0 to 12, but you append ticks as range(0,12) + 0.5. Thus, when you later do set_xticks(ticks), your x axis will begin from 0.5 but your 1st boxplot is plotted at position 0.
I've adapted your code slightly to produce the result you want:
ax = plt.axes()
plt.hold(True)
i=1 # we start plotting from position 1 now
ticks = []
for mes in range(0,len(meses)):
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i+=2
ax.set_xticklabels(meses)
ax.set_xlim(0,ticks[-1]+1) # need to shift the right end of the x limit by 1
ax.set_xticks(ticks)
plt.show()

Related

Stacked bar plot color by a diferent category

I want to make a plot where it shows the RFS by floor opened by the unit_id, but i want the color of the unit_id to be defined by the year. So far i make it work for a reduced set of data, but i think it will be difficult to scale the code.
What i do is, first i identify the order that each unit has at it's floor so i first plot all the units by floor in the first position, then the ones that are in second position and so on.
Thanks!
df_build = pd.DataFrame({'floor':[1,1,1,2,2,2,3,3,3],'unidad':[100,101,102,200,201,202,300,301,302],
'rsf':[2000,1000,1500,1500,2000,1000,1000,1500,2000],'order':[0,1,2,0,1,2,0,1,2],
'year':[2008,2009,2010,2009,2010,2011,2010,2011,2012]})
assign_colors = {2008:'tab:red',2009:'tab:blue',2010:'tab:green',2011:'tab:pink',2012:'tab:olive'}
labels = list(df_build.floor.unique())
order_0 = df_build[df_build.order==0].rsf.values
c1=list(df_build[df_build.order==0].year.replace(assign_colors).values)
order_1 = df_build[df_build.order==1].rsf.values
c2=list(df_build[df_build.order==1].year.replace(assign_colors).values)
order_2 = df_build[df_build.order==2].rsf.values
c3=list(df_build[df_build.order==2].year.replace(assign_colors).values)
width = 0.35
fig, ax = plt.subplots()
ax.barh(labels, order_0, width,color=c1)
ax.barh(labels, order_1, width,left=order_0, color=c2)
ax.barh(labels, order_2, width,left=order_0+order_1, color=c3)
ax.set_ylabel('floor')
ax.set_title('Stacking Plan')
#ax.legend()
plt.show()
Try pivoting the data and loop:
# map the color
df_build['color'] = df_build['year'].map(assign_colors)
# pivot the data
plot_df = df_build.pivot(index='floor', columns='order')
# plot by row
fig, ax = plt.subplots()
for i in df.index:
rights = plot_df.loc[i,'rsf'].cumsum()
lefts = rights.shift(fill_value=0)
ax.barh(i, plot_df.loc[i,'rsf'], left=lefts, color=plot_df.loc[i,'color'])
for j in range(len(rights)):
label = plot_df.loc[i, 'unidad'].iloc[j]
rsf = plot_df.loc[i, 'rsf'].iloc[j]
x = (rights.iloc[j] + lefts.iloc[j]) / 2
ax.text(x, i, f'{label}-{rsf}', ha='center')
Output:

matplotlib barh: how to make a visual gap between two groups of bars?

I have some sorted data of which I only show the highest and lowest values in a figure. This is a minimal version of what currently I have:
import matplotlib.pyplot as plt
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
ax.set_title('Comparison of Top 3 and Bottom 3')
plt.show()
Result:
I'd like to make an additional gap to this figure to make it more visually clear that the majority of data is in fact not displayed in this plot. For example, something very simple like the following would be sufficient:
Is this possible in matplotlib?
Here is a flexible approach that just plots a dummy bar in-between. The yaxis-transform together with the dummy bar's position is used to plot 3 black dots.
If multiple separations are needed, they all need a different dummy label, for example repeating the space character.
import matplotlib.pyplot as plt
import numpy as np
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# dummy bar inbetween
dummy_bar = ax.barh(" ", 0, color='none')
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
# add three dots using the dummy bar's position
ax.scatter([0.05] * 3, dummy_bar[0].get_y() + np.linspace(0, dummy_bar[0].get_height(), 3),
marker='o', s=5, color='black', transform=ax.get_yaxis_transform())
ax.set_title('Comparison of Top 3 and Bottom 3')
ax.tick_params(axis='y', length=0) # hide the tick marks
ax.margins(y=0.02) # less empty space at top and bottom
plt.show()
The following function,
def top_bottom(x, l, n, ax=None, gap=1):
from matplotlib.pyplot import gca
if n <= 0 : raise ValueError('No. of top/bottom values must be positive')
if n > len(x) : raise ValueError('No. of top/bottom values should be not greater than data length')
if n+n > len(x):
print('Warning: no. of top/bottom values is larger than one'
' half of data length, OVERLAPPING')
if gap < 0 : print('Warning: some bar will be overlapped')
ax = ax if ax else gca()
top_x = x[:+n]
bot_x = x[-n:]
top_y = list(range(n+n, n, -1))
bot_y = list(range(n-gap, -gap, -1))
top_l = l[:+n] # A B C
bot_l = l[-n:] # X Y Z
top_bars = ax.barh(top_y, top_x)
bot_bars = ax.barh(bot_y, bot_x)
ax.set_yticks(top_y+bot_y)
ax.set_yticklabels(top_l+bot_l)
return top_bars, bot_bars
when invoked with your data and n=4, gap=4
bars_top, bars_bottom = top_bottom(x_data, labels, 4, gap=4)
produces
Later, you'll be able to customize the appearance of the bars as you like using the Artists returned by the function.

How to achieve spaces between stacked bar chart, center aligned

I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.
My graph:
x-axis has 8 income distributions, one for each bar
y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.
My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?
The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.
Let me know if you need any more information (I tried to keep everything relevant). Thanks!
Full code (I hope it's not too difficult to read):
from __future__ import division
from pylab import *
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher
data = csv2rec('coa.csv', delimiter=',')
x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']
df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))
#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source:
source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))
#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source2:
source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])
total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10
#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])
#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths, showFirst = 8, xLabels=d_labels,scale=True)
Stackedbarchart program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='' # label for y axis
):
#------------------------------------------------------------------------------
# data fixeratering
# make sure this makes sense
if showFirst != -1:
showFirst = np.min([showFirst, np.shape(data)[0]])
data_copy = np.copy(data[:showFirst]).transpose().astype('float')
data_shape = np.shape(data_copy)
if heights is not None:
heights = heights[:showFirst]
if widths is not None:
widths = widths[:showFirst]
showFirst = -1
else:
data_copy = np.copy(data).transpose()
data_shape = np.shape(data_copy)
# determine the number of bars and corresponding levels from the shape of the data
num_bars = data_shape[1]
levels = data_shape[0]
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2)
# stack the data --
# replace the value in each level by the cumulative sum of all preceding levels
data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)
# scale the data is needed
if scale:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
if heights is not None:
print "WARNING: setting scale and heights does not make sense."
heights = None
elif heights is not None:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
for i in np.arange(num_bars):
data_copy[:,i] *= heights[i]
data_stack[:,i] *= heights[i]
#------------------------------------------------------------------------------
# ticks
if yTicks is not "none":
# it is either a set of ticks or the number of auto ticks to make
real_ticks = True
try:
k = len(yTicks[1])
except:
real_ticks = False
if not real_ticks:
yTicks = float(yTicks)
if scale:
# make the ticks line up to 100 %
y_ticks_at = np.arange(yTicks)/(yTicks-1)
y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
else:
# space the ticks along the y axis
y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
y_tick_labels = np.array([str(i) for i in y_ticks_at])
yTicks=(y_ticks_at, y_tick_labels)
#------------------------------------------------------------------------------
# plot
if edgeCols is None:
edgeCols = ["none"]*len(cols)
# bars
ax.bar(x,
data_stack[0],
color=cols[0],alpha=0.7,
edgecolor=edgeCols[0],
width=widths,
linewidth=0.5,
align='center'
)
for i in np.arange(1,levels):
ax.bar(x,
data_copy[i],
bottom=data_stack[i-1],
color=cols[i],alpha=0.7,
edgecolor=edgeCols[i],
width=widths,
linewidth=0.5,
align='center'
)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
# make ticks if necessary
if yTicks is not "none":
ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
ax.yaxis.tick_left()
plt.yticks(yTicks[0], yTicks[1])
else:
plt.yticks([], [])
if xLabels is not None:
ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
ax.xaxis.tick_bottom()
plt.xticks(x, xLabels, rotation='horizontal')
else:
plt.xticks([], [])
# limits
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
# labels
if xlabel != '':
ax.xlabel(xlabel)
if ylabel != '':
ax.ylabel(ylabel)
Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).
I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='', # label for y axis
xaxlim=None,
axspacing=0,
):
.
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)
.
# limits
#ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
if xaxlim is None:
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
else:
ax.set_xlim(xaxlim)

matplotlib: Group boxplots

Is there a way to group boxplots in matplotlib?
Assume we have three groups "A", "B", and "C" and for each we want to create a boxplot for both "apples" and "oranges". If a grouping is not possible directly, we can create all six combinations and place them linearly side by side. What would be to simplest way to visualize the groupings? I'm trying to avoid setting the tick labels to something like "A + apples" since my scenario involves much longer names than "A".
How about using colors to differentiate between "apples" and "oranges" and spacing to separate "A", "B" and "C"?
Something like this:
from pylab import plot, show, savefig, xlim, figure, \
hold, ylim, legend, boxplot, setp, axes
# function for setting the colors of the box plots pairs
def setBoxColors(bp):
setp(bp['boxes'][0], color='blue')
setp(bp['caps'][0], color='blue')
setp(bp['caps'][1], color='blue')
setp(bp['whiskers'][0], color='blue')
setp(bp['whiskers'][1], color='blue')
setp(bp['fliers'][0], color='blue')
setp(bp['fliers'][1], color='blue')
setp(bp['medians'][0], color='blue')
setp(bp['boxes'][1], color='red')
setp(bp['caps'][2], color='red')
setp(bp['caps'][3], color='red')
setp(bp['whiskers'][2], color='red')
setp(bp['whiskers'][3], color='red')
setp(bp['fliers'][2], color='red')
setp(bp['fliers'][3], color='red')
setp(bp['medians'][1], color='red')
# Some fake data to plot
A= [[1, 2, 5,], [7, 2]]
B = [[5, 7, 2, 2, 5], [7, 2, 5]]
C = [[3,2,5,7], [6, 7, 3]]
fig = figure()
ax = axes()
hold(True)
# first boxplot pair
bp = boxplot(A, positions = [1, 2], widths = 0.6)
setBoxColors(bp)
# second boxplot pair
bp = boxplot(B, positions = [4, 5], widths = 0.6)
setBoxColors(bp)
# thrid boxplot pair
bp = boxplot(C, positions = [7, 8], widths = 0.6)
setBoxColors(bp)
# set axes limits and labels
xlim(0,9)
ylim(0,9)
ax.set_xticklabels(['A', 'B', 'C'])
ax.set_xticks([1.5, 4.5, 7.5])
# draw temporary red and blue lines and use them to create a legend
hB, = plot([1,1],'b-')
hR, = plot([1,1],'r-')
legend((hB, hR),('Apples', 'Oranges'))
hB.set_visible(False)
hR.set_visible(False)
savefig('boxcompare.png')
show()
Here is my version. It stores data based on categories.
import matplotlib.pyplot as plt
import numpy as np
data_a = [[1,2,5], [5,7,2,2,5], [7,2,5]]
data_b = [[6,4,2], [1,2,5,3,2], [2,3,5,1]]
ticks = ['A', 'B', 'C']
def set_box_color(bp, color):
plt.setp(bp['boxes'], color=color)
plt.setp(bp['whiskers'], color=color)
plt.setp(bp['caps'], color=color)
plt.setp(bp['medians'], color=color)
plt.figure()
bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
bpr = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')
# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='Apples')
plt.plot([], c='#2C7BB6', label='Oranges')
plt.legend()
plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
plt.xlim(-2, len(ticks)*2)
plt.ylim(0, 8)
plt.tight_layout()
plt.savefig('boxcompare.png')
I am short of reputation so I cannot post an image to here.
You can run it and see the result. Basically it's very similar to what Molly did.
Note that, depending on the version of python you are using, you may need to replace xrange with range
A simple way would be to use pandas.
I adapted an example from the plotting documentation:
In [1]: import pandas as pd, numpy as np
In [2]: df = pd.DataFrame(np.random.rand(12,2), columns=['Apples', 'Oranges'] )
In [3]: df['Categories'] = pd.Series(list('AAAABBBBCCCC'))
In [4]: pd.options.display.mpl_style = 'default'
In [5]: df.boxplot(by='Categories')
Out[5]:
array([<matplotlib.axes.AxesSubplot object at 0x51a5190>,
<matplotlib.axes.AxesSubplot object at 0x53fddd0>], dtype=object)
Mock data:
df = pd.DataFrame({'Group':['A','A','A','B','C','B','B','C','A','C'],\
'Apple':np.random.rand(10),'Orange':np.random.rand(10)})
df = df[['Group','Apple','Orange']]
Group Apple Orange
0 A 0.465636 0.537723
1 A 0.560537 0.727238
2 A 0.268154 0.648927
3 B 0.722644 0.115550
4 C 0.586346 0.042896
5 B 0.562881 0.369686
6 B 0.395236 0.672477
7 C 0.577949 0.358801
8 A 0.764069 0.642724
9 C 0.731076 0.302369
You can use the Seaborn library for these plots. First melt the dataframe to format data and then create the boxplot of your choice.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dd=pd.melt(df,id_vars=['Group'],value_vars=['Apple','Orange'],var_name='fruits')
sns.boxplot(x='Group',y='value',data=dd,hue='fruits')
The accepted answer uses pylab and works for 2 groups. What if we have more?
Here is the flexible generic solution with matplotlib
import matplotlib.pyplot as pl
# there are 4 individuals, each one tested under 3 different settings
# --- Random data, e.g. results per algorithm:
# Invidual 1
d1_1 = [1,1,2,2,3,3]
d1_2 = [3,3,4,4,5,5]
d1_3 = [5,5,6,6,7,7]
# Individual 2
d2_1 = [7,7,8,8,9,9]
d2_2 = [9,9,10,10,11,11]
d2_3 = [11,11,12,12,13,13]
# Individual 3
d3_1 = [1,2,3,4,5,6]
d3_2 = [4,5,6,7,8,9]
d3_3 = [10,11,12,13,14,15]
# Individual 4
d4_1 = [1,1,2,2,3,3]
d4_2 = [9,9,10,10,11,11]
d4_3 = [10,11,12,13,14,15]
# --- Combining your data:
data_group1 = [d1_1, d1_2, d1_3]
data_group2 = [d2_1, d2_2, d2_3]
data_group3 = [d3_1, d3_2, d3_3]
data_group4 = [d4_1, d4_2, d4_3]
colors = ['pink', 'lightblue', 'lightgreen', 'violet']
# we compare the performances of the 4 individuals within the same set of 3 settings
data_groups = [data_group1, data_group2, data_group3, data_group4]
# --- Labels for your data:
labels_list = ['a','b', 'c']
width = 1/len(labels_list)
xlocations = [ x*((1+ len(data_groups))*width) for x in range(len(data_group1)) ]
symbol = 'r+'
ymin = min ( [ val for dg in data_groups for data in dg for val in data ] )
ymax = max ( [ val for dg in data_groups for data in dg for val in data ])
ax = pl.gca()
ax.set_ylim(ymin,ymax)
ax.grid(True, linestyle='dotted')
ax.set_axisbelow(True)
pl.xlabel('X axis label')
pl.ylabel('Y axis label')
pl.title('title')
space = len(data_groups)/2
offset = len(data_groups)/2
# --- Offset the positions per group:
group_positions = []
for num, dg in enumerate(data_groups):
_off = (0 - space + (0.5+num))
print(_off)
group_positions.append([x+_off*(width+0.01) for x in xlocations])
for dg, pos, c in zip(data_groups, group_positions, colors):
boxes = ax.boxplot(dg,
sym=symbol,
labels=['']*len(labels_list),
# labels=labels_list,
positions=pos,
widths=width,
boxprops=dict(facecolor=c),
# capprops=dict(color=c),
# whiskerprops=dict(color=c),
# flierprops=dict(color=c, markeredgecolor=c),
medianprops=dict(color='grey'),
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
patch_artist=True,
)
ax.set_xticks( xlocations )
ax.set_xticklabels( labels_list, rotation=0 )
pl.show()
Just to add to the conversation, I have found a more elegant way to change the color of the box plot by iterating over the dictionary of the object itself
import numpy as np
import matplotlib.pyplot as plt
def color_box(bp, color):
# Define the elements to color. You can also add medians, fliers and means
elements = ['boxes','caps','whiskers']
# Iterate over each of the elements changing the color
for elem in elements:
[plt.setp(bp[elem][idx], color=color) for idx in xrange(len(bp[elem]))]
return
a = np.random.uniform(0,10,[100,5])
bp = plt.boxplot(a)
color_box(bp, 'red')
Cheers!
Here's a function I wrote that takes Molly's code and some other code I've found on the internet to make slightly fancier grouped boxplots:
import numpy as np
import matplotlib.pyplot as plt
def custom_legend(colors, labels, linestyles=None):
""" Creates a list of matplotlib Patch objects that can be passed to the legend(...) function to create a custom
legend.
:param colors: A list of colors, one for each entry in the legend. You can also include a linestyle, for example: 'k--'
:param labels: A list of labels, one for each entry in the legend.
"""
if linestyles is not None:
assert len(linestyles) == len(colors), "Length of linestyles must match length of colors."
h = list()
for k,(c,l) in enumerate(zip(colors, labels)):
clr = c
ls = 'solid'
if linestyles is not None:
ls = linestyles[k]
patch = patches.Patch(color=clr, label=l, linestyle=ls)
h.append(patch)
return h
def grouped_boxplot(data, group_names=None, subgroup_names=None, ax=None, subgroup_colors=None,
box_width=0.6, box_spacing=1.0):
""" Draws a grouped boxplot. The data should be organized in a hierarchy, where there are multiple
subgroups for each main group.
:param data: A dictionary of length equal to the number of the groups. The key should be the
group name, the value should be a list of arrays. The length of the list should be
equal to the number of subgroups.
:param group_names: (Optional) The group names, should be the same as data.keys(), but can be ordered.
:param subgroup_names: (Optional) Names of the subgroups.
:param subgroup_colors: A list specifying the plot color for each subgroup.
:param ax: (Optional) The axis to plot on.
"""
if group_names is None:
group_names = data.keys()
if ax is None:
ax = plt.gca()
plt.sca(ax)
nsubgroups = np.array([len(v) for v in data.values()])
assert len(np.unique(nsubgroups)) == 1, "Number of subgroups for each property differ!"
nsubgroups = nsubgroups[0]
if subgroup_colors is None:
subgroup_colors = list()
for k in range(nsubgroups):
subgroup_colors.append(np.random.rand(3))
else:
assert len(subgroup_colors) == nsubgroups, "subgroup_colors length must match number of subgroups (%d)" % nsubgroups
def _decorate_box(_bp, _d):
plt.setp(_bp['boxes'], lw=0, color='k')
plt.setp(_bp['whiskers'], lw=3.0, color='k')
# fill in each box with a color
assert len(_bp['boxes']) == nsubgroups
for _k,_box in enumerate(_bp['boxes']):
_boxX = list()
_boxY = list()
for _j in range(5):
_boxX.append(_box.get_xdata()[_j])
_boxY.append(_box.get_ydata()[_j])
_boxCoords = zip(_boxX, _boxY)
_boxPolygon = plt.Polygon(_boxCoords, facecolor=subgroup_colors[_k])
ax.add_patch(_boxPolygon)
# draw a black line for the median
for _k,_med in enumerate(_bp['medians']):
_medianX = list()
_medianY = list()
for _j in range(2):
_medianX.append(_med.get_xdata()[_j])
_medianY.append(_med.get_ydata()[_j])
plt.plot(_medianX, _medianY, 'k', linewidth=3.0)
# draw a black asterisk for the mean
plt.plot([np.mean(_med.get_xdata())], [np.mean(_d[_k])], color='w', marker='*',
markeredgecolor='k', markersize=12)
cpos = 1
label_pos = list()
for k in group_names:
d = data[k]
nsubgroups = len(d)
pos = np.arange(nsubgroups) + cpos
label_pos.append(pos.mean())
bp = plt.boxplot(d, positions=pos, widths=box_width)
_decorate_box(bp, d)
cpos += nsubgroups + box_spacing
plt.xlim(0, cpos-1)
plt.xticks(label_pos, group_names)
if subgroup_names is not None:
leg = custom_legend(subgroup_colors, subgroup_names)
plt.legend(handles=leg)
You can use the function(s) like this:
data = { 'A':[np.random.randn(100), np.random.randn(100) + 5],
'B':[np.random.randn(100)+1, np.random.randn(100) + 9],
'C':[np.random.randn(100)-3, np.random.randn(100) -5]
}
grouped_boxplot(data, group_names=['A', 'B', 'C'], subgroup_names=['Apples', 'Oranges'], subgroup_colors=['#D02D2E', '#D67700'])
plt.show()
Grouped boxplots, towards subtle academic publication styling... (source)
(Left) Python 2.7.12 Matplotlib v1.5.3. (Right) Python 3.7.3. Matplotlib v3.1.0.
Code:
import numpy as np
import matplotlib.pyplot as plt
# --- Your data, e.g. results per algorithm:
data1 = [5,5,4,3,3,5]
data2 = [6,6,4,6,8,5]
data3 = [7,8,4,5,8,2]
data4 = [6,9,3,6,8,4]
# --- Combining your data:
data_group1 = [data1, data2]
data_group2 = [data3, data4]
# --- Labels for your data:
labels_list = ['a','b']
xlocations = range(len(data_group1))
width = 0.3
symbol = 'r+'
ymin = 0
ymax = 10
ax = plt.gca()
ax.set_ylim(ymin,ymax)
ax.set_xticklabels( labels_list, rotation=0 )
ax.grid(True, linestyle='dotted')
ax.set_axisbelow(True)
ax.set_xticks(xlocations)
plt.xlabel('X axis label')
plt.ylabel('Y axis label')
plt.title('title')
# --- Offset the positions per group:
positions_group1 = [x-(width+0.01) for x in xlocations]
positions_group2 = xlocations
plt.boxplot(data_group1,
sym=symbol,
labels=['']*len(labels_list),
positions=positions_group1,
widths=width,
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
# patch_artist=False,
)
plt.boxplot(data_group2,
labels=labels_list,
sym=symbol,
positions=positions_group2,
widths=width,
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
# patch_artist=False,
)
plt.savefig('boxplot_grouped.png')
plt.savefig('boxplot_grouped.pdf') # when publishing, use high quality PDFs
#plt.show() # uncomment to show the plot.
I used the code given by Kuzeko and it worked well, but I found that the boxes in each group were being drawn in the reverse order. I changed ...x-_off... to ...x+_off... in the following line (just above the last for loop) which fixes it for me:
group_positions.append([x+_off*(width+0.01) for x in xlocations])
A boxplot above was modified to obtain group boxplots with 3 data types.
import matplotlib.pyplot as plt
import numpy as np
ord = [[16.9423,
4.0410,
19.1185],
[18.5134,
17.8048,
19.2669],
[18.7286,
18.0576,
19.1717],
[18.8998,
18.8469,
19.0005],
[18.8126,
18.7870,
18.8393],
[18.7770,
18.7511,
18.8022],
[18.7409,
18.7075,
18.7747],
[18.6866,
18.6624,
18.7093
],
[18.6748],
[18.9069,
18.6752,
19.0769],
[19.0012,
18.9783,
19.0202
],
[18.9448,
18.9134,
18.9813],
[19.1242,
18.8256,
19.3185],
[19.2118,
19.1661,
19.2580],
[19.2505,
19.1231,
19.3526]]
seq = [[17.8092,
4.0410,
19.6653],
[18.7266,
18.2556,
19.3739],
[18.6051,
18.0589,
19.0557],
[18.6467,
18.5629,
18.7566],
[18.5307,
18.4999,
18.5684],
[18.4732,
18.4484,
18.4985],
[18.5234,
18.5027,
18.4797,
18.4573],
[18.3987,
18.3636,
18.4544],
[18.3593],
[18.7234,
18.7092,
18.7598],
[18.7438,
18.7224,
18.7677],
[18.7304,
18.7111,
18.6880,
18.6913,
18.6678],
[18.8926,
18.5902,
19.2003],
[19.1059,
19.0835,
19.0601,
19.0373,
19.0147],
[19.1925,
19.0177,
19.2588]]
apd=[[17.0331,
4.0410,
18.5670],
[17.6124,
17.1975,
18.0755],
[17.3956,
17.1572,
17.9140],
[17.8295,
17.6514,
18.1466],
[18.0665,
17.9144,
18.2157],
[18.1518,
18.0382,
18.2722],
[18.1975,
18.0956,
18.2987],
[18.2219,
18.1293,
18.3062],
[18.2870,
18.2215,
18.3513],
[18.3047,
18.2363,
18.3950],
[18.3580,
18.2923,
18.4205],
[18.3830,
18.3250,
18.4381],
[18.4135,
18.3645,
18.4753],
[18.4580,
18.4095,
18.5170],
[18.4900,
18.4430,
18.5435]
]
ticks = [120,
240,
360,
516,
662,
740,
874,
1022,
1081,
1201,
1320,
1451,
1562,
1680,
1863]
def set_box_color(bp, color):
plt.setp(bp['boxes'], color=color)
plt.setp(bp['whiskers'], color=color)
plt.setp(bp['caps'], color=color)
plt.setp(bp['medians'], color=color)
plt.figure()
bpl = plt.boxplot(ord, positions=np.array(range(len(ord)))*3.0-0.3, sym='', widths=0.6)
bpr = plt.boxplot(seq, positions=np.array(range(len(seq)))*3.0+0.3, sym='', widths=0.6)
bpg = plt.boxplot(apd, positions=np.array(range(len(apd)))*3.0+0.9, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')
set_box_color(bpg, '#99d8c9')
# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='ORD')
plt.plot([], c='#2C7BB6', label='SEQ')
plt.plot([], c='#99d8c9', label='APD')
plt.legend()
plt.xticks(range(0, len(ticks) * 3, 3), ticks)
plt.xlim(-2, len(ticks)*3)
plt.ylim(0, 20)
plt.tight_layout()
plt.show()
plt.savefig('boxcompare.png')

increasing width of figure in matplotlib according to number of x values

I was trying to draw a barchart using matplotlib.The number of items to be plotted can vary . I cannot set the figure.set_size_inches(w,h) or set_figwidth(w) with constants(like 6.,8. or 8.,12. etc),since I cannot tell in advance what the value of w or h should be.I want the width of figure to increase as the number of items to be plotted increases.Can someone tell me how I can do this?
import pylab
def create_barchart(map):
xvaluenames = map.keys()
xvaluenames.sort()
yvalues = map.values()
max_yvalue = get_max_yvalue(yvalues)
xdata = range(len(xvaluenames))
ydata = [map[x] for x in xvaluenames]
splitxdata = [x.split('-',1) for x in xvaluenames]
xlabels = [x[0] for x in splitxdata]
figure = pylab.figure()
ax = figure.add_subplot(1,1,1)
figsize = figure.get_size_inches()
print 'figure size1=',figsize,'width=',figsize[0],'height=',figsize[1]
barwidth = .25
ystep = max_yvalue/5
pylab.grid(True)
if xdata and ydata:
ax.bar(xdata, ydata, width=barwidth,align='center',color='orange')
ax.set_xlabel('xvalues',color='green')
ax.set_ylabel('yvalues',color='green')
ax.set_xticks(xdata)
ax.set_xlim([min(xdata) - 0.5, max(xdata) + 0.5])
ax.set_xticklabels(xlabels)
ax.set_yticks(range(0,max_yvalue+ystep,ystep))
ax.set_ylim(0,max(ydata)+ystep)
figure.autofmt_xdate(rotation=30)
figure.savefig('mybarplot',format="png")
print 'figure size2=',figure.get_size_inches()
pylab.show()
def get_max_yvalue(yvals):
return max(yvals) if yvals else 0
if I try with a small set of items,I get
if __name__=='__main__':
datamap = dict(mark=39,jim=40, simon=20,dan=33)
print datamap
create_barchart(datamap)
but if I use a larger set
datamap = dict(mark=39,jim=40, simon=20,dan=33)
additional_values= dict(jon=34,ray=23,bert=45,kevin=35,ned=31,bran=11,tywin=56,tyrion=30,jaime=36,griffin=25,viserys=25)
datamap.update(additional_values)
create_barchart(datamap)
This looks awful,
I am wondering if there is a way to increase the width of figure,according to the number of items to be plotted,keeping the width of bars in both cases same
You can set the width when you initialize the figure:
# default scale is 1 in your original case, scales with other cases:
widthscale = len(yvalues)/4
figsize = (8*widthscale,6) # fig size in inches (width,height)
figure = pylab.figure(figsize = figsize) # set the figsize
Replace the figure = pylab.figure() line with the above three lines and you get what your asking for.

Categories

Resources