Change the facecolor of boxplot in pandas - python

I need to change the colors of the boxplot drawn using pandas utility function. I can change most properties using the color argument but can't figure out how to change the facecolor of the box. Someone knows how to do it?
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props)

While I still recommend seaborn and raw matplotlib over the plotting interface in pandas, it turns out that you can pass patch_artist=True as a kwarg to df.plot.box, which will pass it as a kwarg to df.plot, which will pass is as a kwarg to matplotlib.Axes.boxplot.
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props, patch_artist=True)

As suggested, I ended up creating a function to plot this, using raw matplotlib.
def plot_boxplot(data, ax):
bp = ax.boxplot(data.values, patch_artist=True)
for box in bp['boxes']:
box.set(color='DarkGreen')
box.set(facecolor='DarkGreen')
for whisker in bp['whiskers']:
whisker.set(color="DarkOrange")
for cap in bp['caps']:
cap.set(color="Gray")
for median in bp['medians']:
median.set(color="white")
ax.axhline(0, color="DarkBlue", linestyle=":")
ax.set_xticklabels(data.columns)

I suggest using df.plot.box with patch_artist=True and return_type='both' (which returns the matplotlib axes the boxplot is drawn on and a dictionary whose values are the matplotlib Lines of the boxplot) in order to have the best customization possibilities.
For example, given this data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(
data=np.random.randn(100, 4),
columns=list("ABCD")
)
you can set a specific color for all the boxes:
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch in props['boxes']:
patch.set_facecolor('lime')
plt.show()
you can set a specific color for each box:
colors = ['green','blue','yellow','red']
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors):
patch.set_facecolor(color)
plt.show()
you can easily integrate a colormap:
colors = np.random.randint(0,10, 4)
cm = plt.cm.get_cmap('rainbow')
colors_cm = [cm((c-colors.min())/(colors.max()-colors.min())) for c in colors]
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors_cm):
patch.set_facecolor(color)
# to add colorbar
fig.colorbar(plt.cm.ScalarMappable(
plt.cm.colors.Normalize(min(colors),max(colors)),
cmap='rainbow'
), ax=ax, cmap='rainbow')
plt.show()

Related

Seaborn - Display Last Value / Label

I would like create an plot with to display the last value on line. But i can not create the plot with the last value on chart. Do you have an idea for to resolve my problem, thanks you !
Input :
DataFrame
Plot
Output :
Cross = Last Value In columns
Output Final
# import eikon as ek
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import seaborn as sns; sns.set()
import pylab
from scipy import *
from pylab import *
fichier = "P:/GESTION_RPSE/GES - Gestion Epargne Salariale/Dvp Python/Florian/Absolute
Performance/PLOT.csv"
df = pd.read_csv(fichier)
df = df.drop(columns=['Unnamed: 0'])
# sns.set()
plt.figure(figsize=(16, 10))
df = df.melt('Date', var_name='Company', value_name='Value')
#palette = sns.color_palette("husl",12)
ax = sns.lineplot(x="Date", y="Value", hue='Company', data=df).set_title("LaLaLa")
plt.show()
Do you just want to put an 'X' at the end of your lines?
If so, you could pass markerevery=[-1] to the call to lineplot(). However there are a few caveats:
You have to use style= instead of hue= otherwise, there are no markers drawn
Filled markers work better than unfilled markers (like "x"). You can just use markers=True to use the default markers, or pass a list markers=['s','d','o',etc...]
code:
fmri = sns.load_dataset("fmri")
fig, ax = plt.subplots()
ax = sns.lineplot(x="timepoint", y="signal",
style="event", data=fmri, ci=None, markers=True, markevery=[-1], markersize=10)

How to show label names in pandas groupby histogram plot

I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)

How can I plot slice of certain DataFrame for each row with different color?

I would like to plot certain slices of my Pandas Dataframe for each rows (based on row indexes) with different colors.
My data look like the following:
I already tried with the help of this tutorial to find a way but I couldn't - probably due to a lack of skills.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("D:\SOF10.csv" , header=None)
df.head()
#Slice interested data
C = df.iloc[:, 2::3]
#Plot Temp base on row index colorfully
C.apply(lambda x: plt.scatter(x.index, x, c='g'))
plt.show()
Following is my expected plot:
I was also wondering if I could displace the mean of each row of the sliced data which contains 480 values somewhere in the plot or in the legend beside of plot! Is it feasible (like the following picture) to calculate the mean and displaced somewhere in the legend or by using small font size displace next to its own data in graph ?
Data sample: data
This gives the plot without legend
C = df.iloc[:,2::3].stack().reset_index()
C.columns = ['level_0', 'level_1', 'Temperature']
fig, ax = plt.subplots(1,1)
C.plot('level_0', 'Temperature',
ax=ax, kind='scatter',
c='level_0', colormap='tab20',
colorbar=False, legend=True)
ax.set_xlabel('Cycles')
plt.show()
Edit to reflect modified question:
stack() transform your (sliced) dataframe to a series with index (row, col)
reset_index() reset the double-level index above to level_0 (row), level_1 (col).
set_xlabel sets the label of x-axis to what you want.
Edit 2: The following produces scatter with legend:
CC = df.iloc[:,2::3]
fig, ax = plt.subplots(1,1, figsize=(16,9))
labels = CC.mean(axis=1)
for i in CC.index:
ax.scatter([i]*len(CC.columns[1:]), CC.iloc[i,1:], label=labels[i])
ax.legend()
ax.set_xlabel('Cycles')
ax.set_ylabel('Temperature')
plt.show()
This may be an approximate answer. scatter(c=, cmap= can be used for desired coloring.
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import itertools
df = pd.DataFrame({'a':[34,22,1,34]})
fig, subplot_axes = plt.subplots(1, 1, figsize=(20, 10)) # width, height
colors = ['red','green','blue','purple']
cmap=matplotlib.colors.ListedColormap(colors)
for col in df.columns:
subplot_axes.scatter(df.index, df[col].values, c=df.index, cmap=cmap, alpha=.9)

making colorbar values integer in a heatmap matplotlib seaborn

I'm trying to make my colourbar have integer values instead of decimals, but coding this is a lot harder than anticipated.
my initial code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
#sns.set()
# read data
revels_data = pd.read_csv("revels2.txt")
rd = revels_data
revels = rd.pivot("Flavour", "Packet number", "Contents")
# orders flavours
revels.index = pd.CategoricalIndex(revels.index, categories=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"])
revels.sortlevel(level=0, inplace=True)
# Draw a heatmap with the numeric values in each cell
ax = sns.heatmap(revels, annot=True, fmt="d", linewidths=0.4, cmap="YlOrRd")
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.show()
which produces
Trying to reverse engineer one of the answers from here
by adding the following code
cmap = plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
# set limits .5 outside true range
mat = plt.matshow(rd.Contents, cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
plt.matshow(rd.Contents ,cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
#tell the colorbar to tick at integers
cax = plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
but getting errors, namely ValueError: not enough values to unpack.
I think I may have applied the code wrong, would appreciate any help.
Here is a full working example, which creates a discrete colorbar for a seaborn heatmap plot with integer values as colorbar ticks.
import pandas as pd
import numpy as np; np.random.seed(8)
import matplotlib.pyplot as plt
import seaborn.apionly as sns
plt.rcParams["figure.figsize"] = 10,5.5
flavours=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"]
num = np.arange(0, 6*36).astype(int) % 36
flavs = np.random.choice(flavours, size=len(num))
conts = np.random.randint(0,6, len(num)).astype(int)
df = pd.DataFrame({"Packet number":num ,"Flavour":flavs,"Contents" : conts})
revels = pd.pivot_table(df, index=["Flavour"], columns=["Packet number"], values="Contents", aggfunc=np.sum)
revels.index = pd.CategoricalIndex(revels.index, categories=flavours)
revels.sortlevel(level=0, inplace=True)
revels= revels.fillna(0)
ticks=np.arange(revels.values.min(),revels.values.max()+1 )
boundaries = np.arange(revels.values.min()-.5,revels.values.max()+1.5 )
cmap = plt.get_cmap("YlOrRd", revels.values.max()-revels.values.min()+1)
ax = sns.heatmap(revels, annot=True, linewidths=0.4, cmap=cmap,
cbar_kws={"ticks":ticks, "boundaries":boundaries})
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.tight_layout()
plt.show()

Multiple histograms in Pandas

I would like to create the following histogram (see image below) taken from the book "Think Stats". However, I cannot get them on the same plot. Each DataFrame takes its own subplot.
I have the following code:
import nsfg
import matplotlib.pyplot as plt
df = nsfg.ReadFemPreg()
preg = nsfg.ReadFemPreg()
live = preg[preg.outcome == 1]
first = live[live.birthord == 1]
others = live[live.birthord != 1]
#fig = plt.figure()
#ax1 = fig.add_subplot(111)
first.hist(column = 'prglngth', bins = 40, color = 'teal', \
alpha = 0.5)
others.hist(column = 'prglngth', bins = 40, color = 'blue', \
alpha = 0.5)
plt.show()
The above code does not work when I use ax = ax1 as suggested in: pandas multiple plots not working as hists nor this example does what I need: Overlaying multiple histograms using pandas. When I use the code as it is, it creates two windows with histograms. Any ideas how to combine them?
Here's an example of how I'd like the final figure to look:
As far as I can tell, pandas can't handle this situation. That's ok since all of their plotting methods are for convenience only. You'll need to use matplotlib directly. Here's how I do it:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
#import seaborn
#seaborn.set(style='ticks')
np.random.seed(0)
df = pandas.DataFrame(np.random.normal(size=(37,2)), columns=['A', 'B'])
fig, ax = plt.subplots()
a_heights, a_bins = np.histogram(df['A'])
b_heights, b_bins = np.histogram(df['B'], bins=a_bins)
width = (a_bins[1] - a_bins[0])/3
ax.bar(a_bins[:-1], a_heights, width=width, facecolor='cornflowerblue')
ax.bar(b_bins[:-1]+width, b_heights, width=width, facecolor='seagreen')
#seaborn.despine(ax=ax, offset=10)
And that gives me:
In case anyone wants to plot one histogram over another (rather than alternating bars) you can simply call .hist() consecutively on the series you want to plot:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
np.random.seed(0)
df = pandas.DataFrame(np.random.normal(size=(37,2)), columns=['A', 'B'])
df['A'].hist()
df['B'].hist()
This gives you:
Note that the order you call .hist() matters (the first one will be at the back)
A quick solution is to use melt() from pandas and then plot with seaborn.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# make dataframe
df = pd.DataFrame(np.random.normal(size=(200,2)), columns=['A', 'B'])
# plot melted dataframe in a single command
sns.histplot(df.melt(), x='value', hue='variable',
multiple='dodge', shrink=.75, bins=20);
Setting multiple='dodge' makes it so the bars are side-by-side, and shrink=.75 makes it so the pair of bars take up 3/4 of the whole bin.
To help understand what melt() did, these are the dataframes df and df.melt():
From the pandas website (http://pandas.pydata.org/pandas-docs/stable/visualization.html#visualization-hist):
df4 = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000),
'c': np.random.randn(1000) - 1}, columns=['a', 'b', 'c'])
plt.figure();
df4.plot(kind='hist', alpha=0.5)
You make two dataframes and one matplotlib axis
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df1 = pd.DataFrame({
'data1': np.random.randn(10),
'data2': np.random.randn(10)
})
df2 = df1.copy()
fig, ax = plt.subplots()
df1.hist(column=['data1'], ax=ax)
df2.hist(column=['data2'], ax=ax)
Here is the snippet, In my case I have explicitly specified bins and range as I didn't handle outlier removal as the author of the book.
fig, ax = plt.subplots()
ax.hist([first.prglngth, others.prglngth], 10, (27, 50), histtype="bar", label=("First", "Other"))
ax.set_title("Histogram")
ax.legend()
Refer Matplotlib multihist plot with different sizes example.
this could be done with brevity
plt.hist([First, Other], bins = 40, color =('teal','blue'), label=("First", "Other"))
plt.legend(loc='best')
Note that as the number of bins increase, it may become a visual burden.
You could also try to check out the pandas.DataFrame.plot.hist() function which will plot the histogram of each column of the dataframe in the same figure.
Visibility is limited though but you can check out if it helps!
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.hist.html

Categories

Resources