Python Matplotlib: Color graph bygroup [duplicate] - python

Here is needed to plot CDF for 8 different functions in one plot. The problem that it gives just 7 different colors and the 8 one gives just first blue color again. How to make 8 different colors?
Here is the script:
locerror_2d=[Scan_Around[1],Triangle_Around[1],M_shape_Around[1],Hilbert_Around[1],Scan_SbS[1],Triangle_SbS[1],M_shape_SbS[1],Hilbert_SbS[1]]
# N = len(locerror_2d[0]) #same for all ( here, I hope so... )
# N1=len(locerror_2d[2])
H_cent,h_cent1 = np.histogram( locerror_2d[0], bins = 10, normed = True ) # Random Walk Centroid
hy_cent = np.cumsum(H_cent)*(h_cent1[1] - h_cent1[0])
H_1st,h_1st = np.histogram( locerror_2d[1], bins = 10, normed = True ) # Random Walk Weighterd
hy_1st = np.cumsum(H_1st)*(h_1st[1] - h_1st[0])
H_2nd,h_2nd = np.histogram( locerror_2d[2], bins = 10, normed = True ) # Circle Walk Centroid
hy_2nd = np.cumsum(H_2nd)*(h_2nd[1] - h_2nd[0])
H_3rd,h_3rd = np.histogram( locerror_2d[3], bins = 10, normed = True ) # Circle Walk Weighterd
hy_3rd = np.cumsum(H_3rd)*(h_3rd[1] - h_3rd[0])
H_mm,h_mm = np.histogram( locerror_2d[4], bins = 10, normed = True ) # G Walk Centroid
hy_mm = np.cumsum(H_mm)*(h_mm[1] - h_mm[0])
H_shr,h_shr = np.histogram( locerror_2d[5], bins = 10, normed = True ) # G Walk Weighterd
hy_shr = np.cumsum(H_shr)*(h_shr[1] - h_shr[0])
H_s,h_s = np.histogram( locerror_2d[6], bins = 10, normed = True ) # G Walk Weighterd
hy_s = np.cumsum(H_s)*(h_s[1] - h_s[0])
H_sh,h_sh = np.histogram( locerror_2d[7], bins = 10, normed = True ) # G Walk Weighterd
hy_sh = np.cumsum(H_sh)*(h_sh[1] - h_sh[0])
plt.hold(True)
ddd_hist_cent, = plt.plot(h_cent1[1:], hy_cent,label="Scan_Around") # centroid
ddd_hist_1st, = plt.plot(h_1st[1:], hy_1st,label='Triangle_Around') #Gradient
ddd_circ_cent, = plt.plot(h_2nd[1:], hy_cent,label="M_shape_around") # centroid
ddd_circ_wei, = plt.plot(h_3rd[1:], hy_1st,label='Hilbert_Around') #Gradient
ddd_g_cent, = plt.plot(h_mm[1:], hy_cent,label="Scan_SbS") # centroid
ddd_g_wei, = plt.plot(h_shr[1:], hy_1st,label='Triangle_SbS') #Gradient
ddd_g_w, = plt.plot(h_s[1:], hy_cent,label='M_shape_SbS')
ddd_g_we, = plt.plot(h_sh[1:], hy_1st,label='Hilbert_SbS')
plt.hold(False)
plt.rc('legend',**{'fontsize':10})
plt.legend(handles=[ddd_hist_cent, ddd_hist_1st, ddd_circ_cent, ddd_circ_wei, ddd_g_cent,ddd_g_wei, ddd_g_w],loc='center left', bbox_to_anchor=(0.75, 0.18)) #no trilateration here
plt.ylabel('Probability')
plt.xlabel('Localization Error, m')
plt.ylim(ymax = 1.1, ymin = 0)
plt.title('Path Planning Algorithms')
plt.grid()
plt.show()
Thank you

I love to read my colors directly from a colormap with this code
def getColor(c, N, idx):
import matplotlib as mpl
cmap = mpl.cm.get_cmap(c)
norm = mpl.colors.Normalize(vmin=0.0, vmax=N - 1)
return cmap(norm(idx))
Here, c is the name of the colormap (see https://matplotlib.org/examples/color/colormaps_reference.html for a list), N is the number of colors you want in total, and idx is just an index that will yield the specific color.
Then when calling the plot function, just add the color=getColor(c, N, idx) option.

ok. I got it. In the end of plot I just need to show the color.
ddd_hist_cent, = plt.plot(h_cent1[1:], hy_cent,label="Scan_Around", c='yellow')

Easiest solution: Give the last curve a different color:
plt.plot(h_sh[1:], hy_1st,label='Hilbert_SbS', color="orange")
Matplotlib version 1.5 or below has 7 different colors in its color cycle, while matplotlib 2.0 has 10 different colors. Hence, updating matplotlib is another option.
In general, you may of course define your own color cycle which has as many colors as you wish.
Build a cycler from a colormap, as shown in this question:
import matplotlib.pyplot as plt
from cycler import cycler
import numpy as np
N = 8 # number of colors
plt.rcParams["axes.prop_cycle"] = cycler('color', plt.cm.jet(np.linspace(0,1,N)) )
Build a cycler from a list of colors:
import matplotlib.pyplot as plt
from cycler import cycler
colors=["aquamarine","crimson","gold","indigo",
"lime","orange","orchid","sienna"]
plt.rcParams["axes.prop_cycle"] = cycler('color',colors)

Related

How to ignore a color or alpha when using clusters

I am trying to find the dominant color of an image using Pil and cluster. My problem is that my images has a transparent background because these are .png and so i always get black as the dominant color. I'd like to ignore the first dominant color and pick the second most dominant color.
Is there a way to ignore alpha color or just remove it from the result?
I am afraid that by just removing the first most dominant color, i would sometimes remove the actual dominant color in case of the background being a really small part of the image.
Here is my code :
from PIL import Image
import numpy
import math
import matplotlib.pyplot as plot
from sklearn.cluster import MiniBatchKMeans
imgfile = Image.open("images/abra.png")
numarray = numpy.array(imgfile.getdata(), numpy.uint8)
X = []
Y = []
fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))
xaxis = 0
yaxis = 0
cluster_count = 3
clusters = MiniBatchKMeans(n_clusters = cluster_count)
clusters.fit(numarray)
npbins = numpy.arange(0, cluster_count + 1)
histogram = numpy.histogram(clusters.labels_, bins=npbins)
labels = numpy.unique(clusters.labels_)
barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
if(yaxis == 0):
yaxis = 1
else:
xaxis = xaxis + 1
yaxis = 0
for i in range(cluster_count):
barlist[i].set_color('#%02x%02x%02x' % (
math.ceil(clusters.cluster_centers_[i][0]),
math.ceil(clusters.cluster_centers_[i][1]),
math.ceil(clusters.cluster_centers_[i][2])))
plot.show()
Here is en example of my current code :
Image given :
Returned values :
You could avoid passing transparent pixels into the classifier like this, if that's what you mean:
#!/usr/bin/env python3
from PIL import Image
import numpy as np
import math
import matplotlib.pyplot as plot
from sklearn.cluster import MiniBatchKMeans
# Open image
imgfile = Image.open("abra.png")
# Only pass through non-transparent pixels, i.e. those where A!=0 in the RGBA quad
na = np.array([f for f in imgfile.getdata() if f[3] !=0], np.uint8)
X = []
Y = []
fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))
xaxis = 0
yaxis = 0
cluster_count = 3
clusters = MiniBatchKMeans(n_clusters = cluster_count)
clusters.fit(na)
npbins = np.arange(0, cluster_count + 1)
histogram = np.histogram(clusters.labels_, bins=npbins)
labels = np.unique(clusters.labels_)
barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
if(yaxis == 0):
yaxis = 1
else:
xaxis = xaxis + 1
yaxis = 0
for i in range(cluster_count):
barlist[i].set_color('#%02x%02x%02x' % (
math.ceil(clusters.cluster_centers_[i][0]),
math.ceil(clusters.cluster_centers_[i][1]),
math.ceil(clusters.cluster_centers_[i][2])))
plot.show()

simple animation with matplotlib

I am trying to make a simple animation for a stochastic process (just black and white dots randomly changing their colors). To simulate that, I have basically plotted the dots over a grid. However, the important parameter for me being the rate of black dots, I'd like to draw under this grid a progressive bar showing the rate #blackdots/#totaldots looking approximately like this : [///////////////////////_____] 70% (simply just like a power charge bar).
I tried this but the bars are overlaid, and I don't think that Slider is meant to do such animations.
import numpy as np
import matplotlib.pyplot as plt
import random
from matplotlib.widgets import Slider
t=1500
d=5
n=10
raws = [i for i in range(n)]
config = [[2*random.randrange(2)-1 for i in range(n)] for i in range(n)]
def color(op):
if op == 1:
return 'white'
return 'black'
nbblack = 0
for i in config:
for j in i :
nbblack += (j==-1)
blackrate = nbblack/(n**2)
plt.subplots_adjust(bottom=0.25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line]*n,raws,c=colors,edgecolors='black',s=50)
plt.title('t=0',fontdict={'size': 16},x=-0.20,y=25)
samp = Slider(axamp, 'Rate', 0, 1, valinit=blackrate,color='black')
for step in range(t):
plt.pause(0.001)
xpick = random.randrange(n)
ypick = random.randrange(n)
opinion_picked = config[xpick][ypick]
for j in range(d) :
neighboor = random.randrange(n),random.randrange(n)
opinion_neig = config[neighboor[0]][neighboor[1]]
if opinion_neig == opinion_picked :
break
elif j == d-1 :
config[xpick][ypick]=-config[xpick][ypick]
nbblack-=config[xpick][ypick]
blackrate = nbblack/(n**2)
plt.title('t={}'.format(step),fontdict={'size': 16},x=-0.20,y=25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line]*n,raws,c=colors,edgecolors='black',s=50)
axamp = plt.axes([0.28, 0.15, 0.48, 0.03])
samp = Slider(axamp, 'Rate', 0, 1, valinit=blackrate,color='black')
plt.show()
I am not very familiar with maplot so please let me know if there is a better way to do things and thanks a lot for your help !
I don't think that Slider is meant to do such animations ... please let me know if there is a better way to do things ...
Maybe using a custom colorbar would work. I adapted from the Discrete Intervals colorbar example.
The following uses the percentage of black dots to decide which portion of the color bar should be black or white.
Here is an example without animation: five successive plots drawn by a loop. I tried to keep it as close to your example as possible.
import matplotlib as mpl
from matplotlib import pyplot as plt
import random
t = 1500
d = 5
n = 10
raws = [i for i in range(n)]
def f(t=t, d=d, n=n, raws=raws):
# try to get more skew in the data
mode = random.random()
config = [[random.triangular(mode=mode) > 0.5 for i in range(n)] for i in range(n)]
config = [[int(item) or -1 for item in row] for row in config]
# config = [[2*random.randrange(2)-1 for i in range(n)] for i in range(n)]
def color(op):
if op == 1:
return "white"
return "black"
nbblack = 0
for i in config:
for j in i:
nbblack += j == -1
blackrate = nbblack / (n ** 2)
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.25)
# plt.subplots_adjust(bottom=0.25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line] * n, raws, c=colors, edgecolors="black", s=50)
plt.title("t=0", fontdict={"size": 16}, x=-0.20, y=25)
cmap = mpl.colors.ListedColormap(["black", "white"])
bounds = [0, int(blackrate * 100), 100]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
fig.colorbar(
mpl.cm.ScalarMappable(cmap=cmap, norm=norm),
# cax=ax,
# boundaries=[0] + bounds + [13], # Adding values for extensions.
# extend='both',
ticks=bounds,
spacing="proportional",
orientation="horizontal",
label="Percentage Black",
)
plt.show()
plt.close()
for _ in range(5):
f()
BoundaryNorm determines how the colors are distributed. The example uses two colors, black/white, and defines two bins between 0 and 100 using the percentage of black dots for the bin edge.
The spacing="proportional" argument to Figure.colorbar ensures the black/white area is proportional to the bins.
The Matplotlib Tutorials are worth investing time in.

"Exploding" wedges of pie chart when plotting them on a map (Python, matplotlib)

So I've successfully plotted pie charts on a map as markers using ax.scatter, but I'm having trouble with some of the wedges "exploding" out of the pie chart. I can't seem to find the reason for this in my code, and have been unable to find an explanation anywhere online. This code is based on the example here , which a colleague has also used and resulted in normal, uniform pie charts. Between us we can't find the issue, and no errors occur.
The code:
import numpy as np
import math
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
australia_data = np.zeros((24,12))
colors = ['red','yellow','blue','mediumorchid']
#pie chart locations
xlon=[146.7,166,101.6,137.4,145.1,113.6,169.7,113.3,176.0,139.6,148.9,124.2,132.4,142.0,129.6,148.0,116.5,142.8,141.7,128.0,113.6,120.7,128.3,148.6]
ylat=[-42.2,-19.2,-0.5,-3.5,-34.4,-8.7,-45.1,-1.0,-38.6,-26.7,-29.1,-20.0,-14.4,-18.9,-31.3,-6.6,-23.8,-3.4,-7.5,-25.6,3.8,-3.1,-1.9,-23.2]
#function to draw pie charts on map
def draw_pie(ax,X=0, Y=0, size = 1500):
xy = []
start = 0.17
ratios=[1/12.]*12
for ratio in ratios:
x = [0] + np.cos(np.linspace(2*math.pi*start,2*math.pi*(start+ratio))).tolist() #30
y = [0] + np.sin(np.linspace(2*math.pi*start,2*math.pi*(start+ratio))).tolist() #30
xy1=(zip(x,y))
xy.append(xy1)
start -= ratio
piecolors = []
for lt in range(12):
c = australia_data[b,lt]-1
c=int(c)
piecolors.append(colors[c])
for i, xyi in enumerate(xy):
ax.scatter([X],[Y] , marker=(xyi,0), s=size, facecolor=piecolors[i],linewidth=0.5,alpha=.7)
australia_data[:,11] = 1
australia_data[:,4] = 3
australia_data[:,1] = 2
fig = plt.figure()
ax = fig.add_axes([.05,.01,.79,.95])
x1 = 90 #left
x2 = 180 #right
y1 = -50 #bottom
y2 = 10 #top
#Create the map
m = Basemap(resolution='l',projection='merc', llcrnrlat=y1,urcrnrlat=y2,llcrnrlon=x1,urcrnrlon=x2,lat_ts=0) #,lat_ts=(x1+x2)/2
m.drawcoastlines()
#plots pie charts:
for b in range(24):
X,Y=m(xlon[b],ylat[b])
draw_pie(ax,X, Y,size=400)
plt.savefig('australia_pies.png',dpi=400)
Any ideas as to why this is happening (and how to fix it!) would be greatly appreciated!
Edit: it seems to be an issue with the number of wedges in the pie chart - reducing this to 6 results in uniform pies, but 7+ causes some wedges to "explode".
Looking at the scatter piecharts example, you forgot to adjust the size of the pie wedges according to the maximum distance from 0 to the the arc of the wedge. This is necessary because markers normalize the path given before drawing it, hence different wedges need different sizes in order to appear with the same size in the final plot.
import numpy as np
import matplotlib.pyplot as plt
#function to draw pie charts on map
def draw_pie(ax,X=0, Y=0, size = 1500):
xy = []; s=[]
start = 0.0
ratios=[1/12.]*12
for ratio in ratios:
x = [0] + np.cos(np.linspace(2*np.pi*start,2*np.pi*(start+ratio))).tolist() #30
y = [0] + np.sin(np.linspace(2*np.pi*start,2*np.pi*(start+ratio))).tolist() #30
xy1 = np.column_stack([x, y])
s1 = np.abs(xy1).max()
xy.append(xy1)
s.append(s1)
start -= ratio
for xyi, si in zip(xy,s):
ax.scatter([X],[Y] , marker=(xyi,0), s=size*si**2, edgecolor="k")
fig, ax = plt.subplots()
X,Y=166,50
draw_pie(ax,X, Y,size=3000)
plt.show()

Histogram bars overlapping matplotlib

I am able to build the histogram I need. However, the bars overlap over one another.
As you can see I changed the width of the bars to 0.2 but it still overlaps. What is the mistake I am doing?
from matplotlib import pyplot as plt
import numpy as np
from matplotlib.font_manager import FontProperties
from random import randrange
color = ['r', 'b', 'g','c','m','y','k','darkgreen', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred']
label = ['2','6','10','14','18','22','26','30','34','38','42','46']
file_names = ['a','b','c']
diff = [[randrange(10) for a in range(0, len(label))] for a in range(0, len(file_names))]
print diff
x = diff
name = file_names
y = zip(*x)
pos = np.arange(len(x))
width = 1. / (1 + len(x))
fig, ax = plt.subplots()
for idx, (serie, color,label) in enumerate(zip(y, color,label)):
ax.bar(pos + idx * width, serie, width, color=color, label=label)
ax.set_xticks(pos + width)
plt.xlabel('foo')
plt.ylabel('bar')
ax.set_xticklabels(name)
ax.legend()
plt.savefig("final" + '.eps', bbox_inches='tight', pad_inches=0.5,dpi=100,format="eps")
plt.clf()
Here is the graph:
As you can see in the below example, you can easily get non-overlapping bars using a heavily simplified version of your plotting code. I'd suggest you to have a closer look at whether x and y really are what you expect them to be. (And that you try to simplify your code as much as possible when you are looking for an error in the code.)
Also have a look at the computation of the width of the bars. You appear to use the number of subjects for this, while it should be the number of bars per subject instead.
Have a look at this example:
import numpy as np
import matplotlib.pyplot as plt
subjects = ('Tom', 'Dick', 'Harry', 'Sally', 'Sue')
# number of bars per subject
n = 5
# y-data per subject
y = np.random.rand(n, len(subjects))
# x-positions for the bars
x = np.arange(len(subjects))
# plot bars
width = 1./(1+n) # <-- n.b., use number of bars, not number of subjects
for i, yi in enumerate(y):
plt.bar(x+i*width, yi, width)
# add labels
plt.xticks(x+n/2.*width, subjects)
plt.show()
This is the result image:
For reference:
http://matplotlib.org/examples/api/barchart_demo.html
http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
The problem is that the width of your bars is calculated from the three subjects, not the twelve bars per subject. That means you're placing multiple bars at each x-position. Try swapping in these lines where appropriate to fix that:
n = len(x[0]) # New variable with the right length to calculate bar width
width = 1. / (1 + n)
ax.set_xticks(pos + n/2. * width)

matplotlib: Group boxplots

Is there a way to group boxplots in matplotlib?
Assume we have three groups "A", "B", and "C" and for each we want to create a boxplot for both "apples" and "oranges". If a grouping is not possible directly, we can create all six combinations and place them linearly side by side. What would be to simplest way to visualize the groupings? I'm trying to avoid setting the tick labels to something like "A + apples" since my scenario involves much longer names than "A".
How about using colors to differentiate between "apples" and "oranges" and spacing to separate "A", "B" and "C"?
Something like this:
from pylab import plot, show, savefig, xlim, figure, \
hold, ylim, legend, boxplot, setp, axes
# function for setting the colors of the box plots pairs
def setBoxColors(bp):
setp(bp['boxes'][0], color='blue')
setp(bp['caps'][0], color='blue')
setp(bp['caps'][1], color='blue')
setp(bp['whiskers'][0], color='blue')
setp(bp['whiskers'][1], color='blue')
setp(bp['fliers'][0], color='blue')
setp(bp['fliers'][1], color='blue')
setp(bp['medians'][0], color='blue')
setp(bp['boxes'][1], color='red')
setp(bp['caps'][2], color='red')
setp(bp['caps'][3], color='red')
setp(bp['whiskers'][2], color='red')
setp(bp['whiskers'][3], color='red')
setp(bp['fliers'][2], color='red')
setp(bp['fliers'][3], color='red')
setp(bp['medians'][1], color='red')
# Some fake data to plot
A= [[1, 2, 5,], [7, 2]]
B = [[5, 7, 2, 2, 5], [7, 2, 5]]
C = [[3,2,5,7], [6, 7, 3]]
fig = figure()
ax = axes()
hold(True)
# first boxplot pair
bp = boxplot(A, positions = [1, 2], widths = 0.6)
setBoxColors(bp)
# second boxplot pair
bp = boxplot(B, positions = [4, 5], widths = 0.6)
setBoxColors(bp)
# thrid boxplot pair
bp = boxplot(C, positions = [7, 8], widths = 0.6)
setBoxColors(bp)
# set axes limits and labels
xlim(0,9)
ylim(0,9)
ax.set_xticklabels(['A', 'B', 'C'])
ax.set_xticks([1.5, 4.5, 7.5])
# draw temporary red and blue lines and use them to create a legend
hB, = plot([1,1],'b-')
hR, = plot([1,1],'r-')
legend((hB, hR),('Apples', 'Oranges'))
hB.set_visible(False)
hR.set_visible(False)
savefig('boxcompare.png')
show()
Here is my version. It stores data based on categories.
import matplotlib.pyplot as plt
import numpy as np
data_a = [[1,2,5], [5,7,2,2,5], [7,2,5]]
data_b = [[6,4,2], [1,2,5,3,2], [2,3,5,1]]
ticks = ['A', 'B', 'C']
def set_box_color(bp, color):
plt.setp(bp['boxes'], color=color)
plt.setp(bp['whiskers'], color=color)
plt.setp(bp['caps'], color=color)
plt.setp(bp['medians'], color=color)
plt.figure()
bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
bpr = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')
# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='Apples')
plt.plot([], c='#2C7BB6', label='Oranges')
plt.legend()
plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
plt.xlim(-2, len(ticks)*2)
plt.ylim(0, 8)
plt.tight_layout()
plt.savefig('boxcompare.png')
I am short of reputation so I cannot post an image to here.
You can run it and see the result. Basically it's very similar to what Molly did.
Note that, depending on the version of python you are using, you may need to replace xrange with range
A simple way would be to use pandas.
I adapted an example from the plotting documentation:
In [1]: import pandas as pd, numpy as np
In [2]: df = pd.DataFrame(np.random.rand(12,2), columns=['Apples', 'Oranges'] )
In [3]: df['Categories'] = pd.Series(list('AAAABBBBCCCC'))
In [4]: pd.options.display.mpl_style = 'default'
In [5]: df.boxplot(by='Categories')
Out[5]:
array([<matplotlib.axes.AxesSubplot object at 0x51a5190>,
<matplotlib.axes.AxesSubplot object at 0x53fddd0>], dtype=object)
Mock data:
df = pd.DataFrame({'Group':['A','A','A','B','C','B','B','C','A','C'],\
'Apple':np.random.rand(10),'Orange':np.random.rand(10)})
df = df[['Group','Apple','Orange']]
Group Apple Orange
0 A 0.465636 0.537723
1 A 0.560537 0.727238
2 A 0.268154 0.648927
3 B 0.722644 0.115550
4 C 0.586346 0.042896
5 B 0.562881 0.369686
6 B 0.395236 0.672477
7 C 0.577949 0.358801
8 A 0.764069 0.642724
9 C 0.731076 0.302369
You can use the Seaborn library for these plots. First melt the dataframe to format data and then create the boxplot of your choice.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dd=pd.melt(df,id_vars=['Group'],value_vars=['Apple','Orange'],var_name='fruits')
sns.boxplot(x='Group',y='value',data=dd,hue='fruits')
The accepted answer uses pylab and works for 2 groups. What if we have more?
Here is the flexible generic solution with matplotlib
import matplotlib.pyplot as pl
# there are 4 individuals, each one tested under 3 different settings
# --- Random data, e.g. results per algorithm:
# Invidual 1
d1_1 = [1,1,2,2,3,3]
d1_2 = [3,3,4,4,5,5]
d1_3 = [5,5,6,6,7,7]
# Individual 2
d2_1 = [7,7,8,8,9,9]
d2_2 = [9,9,10,10,11,11]
d2_3 = [11,11,12,12,13,13]
# Individual 3
d3_1 = [1,2,3,4,5,6]
d3_2 = [4,5,6,7,8,9]
d3_3 = [10,11,12,13,14,15]
# Individual 4
d4_1 = [1,1,2,2,3,3]
d4_2 = [9,9,10,10,11,11]
d4_3 = [10,11,12,13,14,15]
# --- Combining your data:
data_group1 = [d1_1, d1_2, d1_3]
data_group2 = [d2_1, d2_2, d2_3]
data_group3 = [d3_1, d3_2, d3_3]
data_group4 = [d4_1, d4_2, d4_3]
colors = ['pink', 'lightblue', 'lightgreen', 'violet']
# we compare the performances of the 4 individuals within the same set of 3 settings
data_groups = [data_group1, data_group2, data_group3, data_group4]
# --- Labels for your data:
labels_list = ['a','b', 'c']
width = 1/len(labels_list)
xlocations = [ x*((1+ len(data_groups))*width) for x in range(len(data_group1)) ]
symbol = 'r+'
ymin = min ( [ val for dg in data_groups for data in dg for val in data ] )
ymax = max ( [ val for dg in data_groups for data in dg for val in data ])
ax = pl.gca()
ax.set_ylim(ymin,ymax)
ax.grid(True, linestyle='dotted')
ax.set_axisbelow(True)
pl.xlabel('X axis label')
pl.ylabel('Y axis label')
pl.title('title')
space = len(data_groups)/2
offset = len(data_groups)/2
# --- Offset the positions per group:
group_positions = []
for num, dg in enumerate(data_groups):
_off = (0 - space + (0.5+num))
print(_off)
group_positions.append([x+_off*(width+0.01) for x in xlocations])
for dg, pos, c in zip(data_groups, group_positions, colors):
boxes = ax.boxplot(dg,
sym=symbol,
labels=['']*len(labels_list),
# labels=labels_list,
positions=pos,
widths=width,
boxprops=dict(facecolor=c),
# capprops=dict(color=c),
# whiskerprops=dict(color=c),
# flierprops=dict(color=c, markeredgecolor=c),
medianprops=dict(color='grey'),
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
patch_artist=True,
)
ax.set_xticks( xlocations )
ax.set_xticklabels( labels_list, rotation=0 )
pl.show()
Just to add to the conversation, I have found a more elegant way to change the color of the box plot by iterating over the dictionary of the object itself
import numpy as np
import matplotlib.pyplot as plt
def color_box(bp, color):
# Define the elements to color. You can also add medians, fliers and means
elements = ['boxes','caps','whiskers']
# Iterate over each of the elements changing the color
for elem in elements:
[plt.setp(bp[elem][idx], color=color) for idx in xrange(len(bp[elem]))]
return
a = np.random.uniform(0,10,[100,5])
bp = plt.boxplot(a)
color_box(bp, 'red')
Cheers!
Here's a function I wrote that takes Molly's code and some other code I've found on the internet to make slightly fancier grouped boxplots:
import numpy as np
import matplotlib.pyplot as plt
def custom_legend(colors, labels, linestyles=None):
""" Creates a list of matplotlib Patch objects that can be passed to the legend(...) function to create a custom
legend.
:param colors: A list of colors, one for each entry in the legend. You can also include a linestyle, for example: 'k--'
:param labels: A list of labels, one for each entry in the legend.
"""
if linestyles is not None:
assert len(linestyles) == len(colors), "Length of linestyles must match length of colors."
h = list()
for k,(c,l) in enumerate(zip(colors, labels)):
clr = c
ls = 'solid'
if linestyles is not None:
ls = linestyles[k]
patch = patches.Patch(color=clr, label=l, linestyle=ls)
h.append(patch)
return h
def grouped_boxplot(data, group_names=None, subgroup_names=None, ax=None, subgroup_colors=None,
box_width=0.6, box_spacing=1.0):
""" Draws a grouped boxplot. The data should be organized in a hierarchy, where there are multiple
subgroups for each main group.
:param data: A dictionary of length equal to the number of the groups. The key should be the
group name, the value should be a list of arrays. The length of the list should be
equal to the number of subgroups.
:param group_names: (Optional) The group names, should be the same as data.keys(), but can be ordered.
:param subgroup_names: (Optional) Names of the subgroups.
:param subgroup_colors: A list specifying the plot color for each subgroup.
:param ax: (Optional) The axis to plot on.
"""
if group_names is None:
group_names = data.keys()
if ax is None:
ax = plt.gca()
plt.sca(ax)
nsubgroups = np.array([len(v) for v in data.values()])
assert len(np.unique(nsubgroups)) == 1, "Number of subgroups for each property differ!"
nsubgroups = nsubgroups[0]
if subgroup_colors is None:
subgroup_colors = list()
for k in range(nsubgroups):
subgroup_colors.append(np.random.rand(3))
else:
assert len(subgroup_colors) == nsubgroups, "subgroup_colors length must match number of subgroups (%d)" % nsubgroups
def _decorate_box(_bp, _d):
plt.setp(_bp['boxes'], lw=0, color='k')
plt.setp(_bp['whiskers'], lw=3.0, color='k')
# fill in each box with a color
assert len(_bp['boxes']) == nsubgroups
for _k,_box in enumerate(_bp['boxes']):
_boxX = list()
_boxY = list()
for _j in range(5):
_boxX.append(_box.get_xdata()[_j])
_boxY.append(_box.get_ydata()[_j])
_boxCoords = zip(_boxX, _boxY)
_boxPolygon = plt.Polygon(_boxCoords, facecolor=subgroup_colors[_k])
ax.add_patch(_boxPolygon)
# draw a black line for the median
for _k,_med in enumerate(_bp['medians']):
_medianX = list()
_medianY = list()
for _j in range(2):
_medianX.append(_med.get_xdata()[_j])
_medianY.append(_med.get_ydata()[_j])
plt.plot(_medianX, _medianY, 'k', linewidth=3.0)
# draw a black asterisk for the mean
plt.plot([np.mean(_med.get_xdata())], [np.mean(_d[_k])], color='w', marker='*',
markeredgecolor='k', markersize=12)
cpos = 1
label_pos = list()
for k in group_names:
d = data[k]
nsubgroups = len(d)
pos = np.arange(nsubgroups) + cpos
label_pos.append(pos.mean())
bp = plt.boxplot(d, positions=pos, widths=box_width)
_decorate_box(bp, d)
cpos += nsubgroups + box_spacing
plt.xlim(0, cpos-1)
plt.xticks(label_pos, group_names)
if subgroup_names is not None:
leg = custom_legend(subgroup_colors, subgroup_names)
plt.legend(handles=leg)
You can use the function(s) like this:
data = { 'A':[np.random.randn(100), np.random.randn(100) + 5],
'B':[np.random.randn(100)+1, np.random.randn(100) + 9],
'C':[np.random.randn(100)-3, np.random.randn(100) -5]
}
grouped_boxplot(data, group_names=['A', 'B', 'C'], subgroup_names=['Apples', 'Oranges'], subgroup_colors=['#D02D2E', '#D67700'])
plt.show()
Grouped boxplots, towards subtle academic publication styling... (source)
(Left) Python 2.7.12 Matplotlib v1.5.3. (Right) Python 3.7.3. Matplotlib v3.1.0.
Code:
import numpy as np
import matplotlib.pyplot as plt
# --- Your data, e.g. results per algorithm:
data1 = [5,5,4,3,3,5]
data2 = [6,6,4,6,8,5]
data3 = [7,8,4,5,8,2]
data4 = [6,9,3,6,8,4]
# --- Combining your data:
data_group1 = [data1, data2]
data_group2 = [data3, data4]
# --- Labels for your data:
labels_list = ['a','b']
xlocations = range(len(data_group1))
width = 0.3
symbol = 'r+'
ymin = 0
ymax = 10
ax = plt.gca()
ax.set_ylim(ymin,ymax)
ax.set_xticklabels( labels_list, rotation=0 )
ax.grid(True, linestyle='dotted')
ax.set_axisbelow(True)
ax.set_xticks(xlocations)
plt.xlabel('X axis label')
plt.ylabel('Y axis label')
plt.title('title')
# --- Offset the positions per group:
positions_group1 = [x-(width+0.01) for x in xlocations]
positions_group2 = xlocations
plt.boxplot(data_group1,
sym=symbol,
labels=['']*len(labels_list),
positions=positions_group1,
widths=width,
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
# patch_artist=False,
)
plt.boxplot(data_group2,
labels=labels_list,
sym=symbol,
positions=positions_group2,
widths=width,
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
# patch_artist=False,
)
plt.savefig('boxplot_grouped.png')
plt.savefig('boxplot_grouped.pdf') # when publishing, use high quality PDFs
#plt.show() # uncomment to show the plot.
I used the code given by Kuzeko and it worked well, but I found that the boxes in each group were being drawn in the reverse order. I changed ...x-_off... to ...x+_off... in the following line (just above the last for loop) which fixes it for me:
group_positions.append([x+_off*(width+0.01) for x in xlocations])
A boxplot above was modified to obtain group boxplots with 3 data types.
import matplotlib.pyplot as plt
import numpy as np
ord = [[16.9423,
4.0410,
19.1185],
[18.5134,
17.8048,
19.2669],
[18.7286,
18.0576,
19.1717],
[18.8998,
18.8469,
19.0005],
[18.8126,
18.7870,
18.8393],
[18.7770,
18.7511,
18.8022],
[18.7409,
18.7075,
18.7747],
[18.6866,
18.6624,
18.7093
],
[18.6748],
[18.9069,
18.6752,
19.0769],
[19.0012,
18.9783,
19.0202
],
[18.9448,
18.9134,
18.9813],
[19.1242,
18.8256,
19.3185],
[19.2118,
19.1661,
19.2580],
[19.2505,
19.1231,
19.3526]]
seq = [[17.8092,
4.0410,
19.6653],
[18.7266,
18.2556,
19.3739],
[18.6051,
18.0589,
19.0557],
[18.6467,
18.5629,
18.7566],
[18.5307,
18.4999,
18.5684],
[18.4732,
18.4484,
18.4985],
[18.5234,
18.5027,
18.4797,
18.4573],
[18.3987,
18.3636,
18.4544],
[18.3593],
[18.7234,
18.7092,
18.7598],
[18.7438,
18.7224,
18.7677],
[18.7304,
18.7111,
18.6880,
18.6913,
18.6678],
[18.8926,
18.5902,
19.2003],
[19.1059,
19.0835,
19.0601,
19.0373,
19.0147],
[19.1925,
19.0177,
19.2588]]
apd=[[17.0331,
4.0410,
18.5670],
[17.6124,
17.1975,
18.0755],
[17.3956,
17.1572,
17.9140],
[17.8295,
17.6514,
18.1466],
[18.0665,
17.9144,
18.2157],
[18.1518,
18.0382,
18.2722],
[18.1975,
18.0956,
18.2987],
[18.2219,
18.1293,
18.3062],
[18.2870,
18.2215,
18.3513],
[18.3047,
18.2363,
18.3950],
[18.3580,
18.2923,
18.4205],
[18.3830,
18.3250,
18.4381],
[18.4135,
18.3645,
18.4753],
[18.4580,
18.4095,
18.5170],
[18.4900,
18.4430,
18.5435]
]
ticks = [120,
240,
360,
516,
662,
740,
874,
1022,
1081,
1201,
1320,
1451,
1562,
1680,
1863]
def set_box_color(bp, color):
plt.setp(bp['boxes'], color=color)
plt.setp(bp['whiskers'], color=color)
plt.setp(bp['caps'], color=color)
plt.setp(bp['medians'], color=color)
plt.figure()
bpl = plt.boxplot(ord, positions=np.array(range(len(ord)))*3.0-0.3, sym='', widths=0.6)
bpr = plt.boxplot(seq, positions=np.array(range(len(seq)))*3.0+0.3, sym='', widths=0.6)
bpg = plt.boxplot(apd, positions=np.array(range(len(apd)))*3.0+0.9, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')
set_box_color(bpg, '#99d8c9')
# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='ORD')
plt.plot([], c='#2C7BB6', label='SEQ')
plt.plot([], c='#99d8c9', label='APD')
plt.legend()
plt.xticks(range(0, len(ticks) * 3, 3), ticks)
plt.xlim(-2, len(ticks)*3)
plt.ylim(0, 20)
plt.tight_layout()
plt.show()
plt.savefig('boxcompare.png')

Categories

Resources