Normalizing height / mode of kdeplot to be 1 - python

I am using the FacetGrid example from seaborn [Overlapping densities (‘ridge plot’)]. However, instead of normalizing the integral of the kdeplot, I want to normalize the heights. Does anyone have an idea, how to realize it?
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
# passing color=None to refline() uses the hue mapping
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.figure.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], ylabel="")
g.despine(bottom=True, left=True)
So far, I have done some search engine requests where I tried to find something comparable that has been performed for histplot from matplotlib. However, I have found only solutions for the normalization of the integral.

For just one kdeplot -
A method normalize() to normalize the values -
def normalize(arr, t_min, t_max):
norm_arr = []
diff = t_max - t_min
diff_arr = max(arr) - min(arr)
for i in arr:
temp = (((i - min(arr))*diff)/diff_arr) + t_min
norm_arr.append(temp)
return norm_arr
If fill=False
tips = sns.load_dataset("tips")
ax = sns.kdeplot(data=tips, x="total_bill")
line = ax.lines[0]
line.set_ydata(normalize(line.get_ydata(),0,1))
ax.set_ylim(0,1.05)
ax.autoscale_view()
If fill=True
tips = sns.load_dataset("tips")
ax = sns.kdeplot(data=tips, x="total_bill",fill=True)
path = ax.collections[0].get_paths()
ys = normalize(path[0].vertices[:, 1],0,1)
path[0].vertices[:, 1] = ys
ax.set_ylim(0,1.05)
ax.autoscale_view()
Now if you want to use a FacetGrid then, probably all your problems can be solved just by using sharey=True like -
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal, sharey=True)
But still if you need to normalize then-
define a wrapper function -
def kdeplot(data, **kwargs):
ax = sns.kdeplot(data, **kwargs)
if 'fill' in kwargs.keys() and kwargs['fill']==True:
path = ax.collections[0].get_paths()
ys = normalize(path[0].vertices[:, 1],0,1)
path[0].vertices[:, 1] = ys
else:
line = ax.lines[0]
line.set_ydata(normalize(line.get_ydata(),0,1))
ax.set_ylim(0,1.05)
ax.autoscale_view()
then -
tips = sns.load_dataset("tips")
ax = kdeplot(data=tips, x="total_bill",fill=True)
ax = kdeplot(data=tips, x="total_bill",fill=False, lw=4)
Now you can just use kdeplot instead of sns.kdeplot -
g.map(kdeplot, "x",bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)

Related

How to solve "AttributeError: 'FacetGrid' object has no attribute 'refline'" in Python [duplicate]

If I copy paste the example given on Seaborn website to make a "Ridge Plot", the code fails in two different points:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
# passing color=None to refline() uses the hue mapping
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.figure.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], ylabel="")
g.despine(bottom=True, left=True)
line
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
and line
g.figure.subplots_adjust(hspace=-.25)
fail with this error:
AttributeError: 'FacetGrid' object has no attribute 'figure'
I am on
numpy: 1.19.1
pandas: 1.2.4
seaborn: 0.11.1
You can replace g.figure with g.fig. g.figure is intended to be the new name for the same variable. refline() is new in seaborn 0.11.2 (the website supposes you run the last published version). You could replace the call to g.refline() with g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False).
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(2022)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
df["x"] += df["g"].map(ord)
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, start=1, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
# passing color=None to refline() uses the hue mapping
# g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], xlabel="", ylabel="")
g.despine(bottom=True, left=True)
plt.show()
Here is another example, using the flights dataset:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
flights = sns.load_dataset('flights')
pal = sns.cubehelix_palette(len(flights["year"].unique()), start=1.4, rot=-.25, light=.7, dark=.4)
g = sns.FacetGrid(flights, row="year", hue="year", aspect=20, height=.5, palette=pal)
g.map(sns.kdeplot, "passengers", bw_adjust=.6, cut=5, clip_on=False, fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "passengers", bw_adjust=.6, cut=5, clip_on=False, color="w", lw=2)
g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
def label(x, color, label):
ax = plt.gca()
ax.text(0, .1, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "year")
g.fig.subplots_adjust(hspace=-.7)
g.set(yticks=[], xlabel="", ylabel="", xlim=(None, 680), title="")
g.despine(bottom=True, left=True)
plt.show()

Add y-axis label to ridgeline plot with seaborn

I want to add a y-axis label to a density ridgeline plot using seaborn in python. To make the ridgeline plot, I am following the code from the seaborn gallery. For convenience, I have copied their code snippet below. How should I modify this to label the y-axis in a manner that does not overlap the density curves?
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
To be clear, I'd like the plot to look something like:
You can add the general label like a text use the follow code line g.fig.text(0.04, 0.5, 'Y axis label', va='center', rotation='vertical') and you will obtain the follow:
I think replacing g.fig.subplots_adjust(hspace=-.25) with g.fig.subplots_adjust(hspace=.1) should do the trick.

Labels and plot overlapping

Is there is a way for the plot to not overlap with long labels of y axis in a ridge plot (seaborn). I am using the code of the example seaborn (slighty edited) in order to understand what I mean.
Thanks for any help.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
names = ['15--Specialist COP',
'16--Specialist other fieldcrops',
'20--Specialist horticulture',
'35--Specialist wine',
'36--Specialist orchards - fruits',
'37--Specialist olives',
'38--Permanent crops combined',
'45--Specialist milk',
'48--Specialist sheep and goats',
'49--Specialist cattle']
# 'Specialist granivores',
# 'Mixed crops',
# 'Mixed livestock',
# 'Mixed crops and livestock']
g = np.tile(names, 50)
df = pd.DataFrame(dict(x=x, g=g))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.show()

Adding y-axis on right side to Ridge Plot in Seaborn

I would like to add a sample y-axis tick on the right side of the Ridge plot, to know what is the range of values of all the plots. Preferably I would like to add it only to one of the subplots and not to all of them.
My plot is based on the seaborn 'ridge plot' example at: https://seaborn.pydata.org/examples/kde_ridgeplot.html
I've tried the following code with no luck:
g.set(yticks=[0,200])
g.set_y_label_position("right")
g.set_ylabels('[Range]',fontsize=9,fontweight="normal")
If you want to modify one particular axes from a FacetGrid, you can get a reference from the list g.axes
Here is how I would go about it
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
#
# Changes from seaborn example below this point
#
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25, right=0.9)
# Remove axes details that don't play well with overlap
g.set_titles("")
#g.set(yticks=[])
g.despine(bottom=True, left=True, right=False, top=True, offset=5)
for ax in g.axes.ravel():
if ax.is_first_row(): # can use .is_last_row() to show spine on the bottom plot instead
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.set_ylabel("MW")
else:
ax.spines['right'].set_visible(False)
[l.set_visible(False) for l in ax.get_yticklabels()] # necessary because y-axes are shared

frequency trail in matplotlib

I'm looking into outliers detection. Brendan Gregg has a really nice article and I'm especially intrigued by his visualizations. One of the methods he uses are frequency trails.
I'm trying to reproduce this in matplotlib using this example. Which looks like this:
And the plot is based on this answer: https://stackoverflow.com/a/4152016/948369
Now my issue is, like described by Brendan, that I have a continuous line that masks the outlier (I simplified the input values so you can still see them):
Any help on making the line "non-continuous" for non existent values?
Seaborn also provides a very neat example:
They call it a joy/ridge plot however: https://seaborn.pydata.org/examples/kde_ridgeplot.html
#!/usr/bin/python
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
I would stick with a flat 2D plot and displace each level by a set vertical amount. You'll have to play the the levels (in the code below I called it displace) to properly see the outliers, but this does a pretty good job at replicating your target image. The key, I think, is to set the "zero" values to None so pylab does not draw them.
import numpy as np
import pylab as plt
import itertools
k = 20
X = np.linspace(0, 20, 500)
Y = np.zeros((k,X.size))
# Add some fake data
MU = np.random.random(k)
for n in xrange(k):
Y[n] += np.exp(-(X-MU[n]*n)**2 / (1+n/3))
Y *= 50
# Add some outliers for show
Y += 2*np.random.random(Y.shape)
displace = Y.max()/4
# Add a cutoff
Y[Y<1.0] = None
face_colors = itertools.cycle(["#D3D820", "#C9CC54",
"#D7DA66", "#FDFE42"])
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='black')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for n,y in enumerate(Y):
# Vertically displace each plot
y0 = np.ones(y.shape) * n * displace
y1 = y + n*displace
plt.fill_between(X, y0,y1,lw=1,
facecolor=face_colors.next(),
zorder=len(Y)-n)
plt.show()

Categories

Resources