I have a dataframe hour_dist that shows the hour a customer showed up to a particular location.
hour_dist.sample(5)
Location Hour
88131 1233000000000000 21
111274 1233000000000000 0
81126 2991000000000000 23
104181 1232000000000000 22
55719 1232000000000000 15
I'm trying to plot this data with Seaborn to visualize a ridgeline plot (https://seaborn.pydata.org/examples/kde_ridgeplot.html).
It should essentially show the hour distribution by each location. Here's an example of what it looks like:
With hour_dist, I've been trying, unsuccessfully, to plot the locations on the y axis and the hour on the x axis.
For me working change g to Location and x to Hour, but if many unique Location values there should be many plots with real data:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="Location", hue="Location", aspect=15, height=.5, palette=pal)
If need plot by percentage:
#df['pct'] = df['Location'].div(df.groupby('Hour')['Location'].transform('sum'))
#g = sns.FacetGrid(df, row="pct", hue="pct", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "Hour", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "Hour", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "Hour")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
Related
If I copy paste the example given on Seaborn website to make a "Ridge Plot", the code fails in two different points:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
# passing color=None to refline() uses the hue mapping
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.figure.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], ylabel="")
g.despine(bottom=True, left=True)
line
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
and line
g.figure.subplots_adjust(hspace=-.25)
fail with this error:
AttributeError: 'FacetGrid' object has no attribute 'figure'
I am on
numpy: 1.19.1
pandas: 1.2.4
seaborn: 0.11.1
You can replace g.figure with g.fig. g.figure is intended to be the new name for the same variable. refline() is new in seaborn 0.11.2 (the website supposes you run the last published version). You could replace the call to g.refline() with g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False).
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(2022)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
df["x"] += df["g"].map(ord)
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, start=1, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
# passing color=None to refline() uses the hue mapping
# g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], xlabel="", ylabel="")
g.despine(bottom=True, left=True)
plt.show()
Here is another example, using the flights dataset:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
flights = sns.load_dataset('flights')
pal = sns.cubehelix_palette(len(flights["year"].unique()), start=1.4, rot=-.25, light=.7, dark=.4)
g = sns.FacetGrid(flights, row="year", hue="year", aspect=20, height=.5, palette=pal)
g.map(sns.kdeplot, "passengers", bw_adjust=.6, cut=5, clip_on=False, fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "passengers", bw_adjust=.6, cut=5, clip_on=False, color="w", lw=2)
g.map(plt.axhline, y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
def label(x, color, label):
ax = plt.gca()
ax.text(0, .1, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "year")
g.fig.subplots_adjust(hspace=-.7)
g.set(yticks=[], xlabel="", ylabel="", xlim=(None, 680), title="")
g.despine(bottom=True, left=True)
plt.show()
I have copied the RidgePlot example and adopted it to my needs. I have included the working script and some example data to test.
Image:
Some of my data have y-values equal to 0.0. I would like to emphasise that in my plot. I'm not sure which part of the plot, the fill_between or just every x-axis can be changed to for example remove the mapped colour on those places where "aandeel" == 0.0.
Any ideas on how to remove the colours here? Is it simply possible to remove the x-axes/set width to 0/change color to different?
example data (csv): https://gist.github.com/willemvanopstal/c2892e68d6eb94194acd371e49d949bd
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
sns.set(style="whitegrid", rc={"axes.facecolor": (0, 0, 0, 0)})
data = pd.read_csv('stats_together_data.csv', delimiter=';')
df = data
# Initialize the FacetGrid object
pal = sns.color_palette("Set2")
g = sns.FacetGrid(df, row="process", hue="process", aspect=10, height=0.6, palette=pal)
# Draw the densities in a few steps
g.map(sns.lineplot, "region", "aandeel", clip_on=False, alpha=1, lw=1.5)
g.map(plt.fill_between, "region", "aandeel", interpolate=True)
g.map(sns.lineplot, "region", "aandeel", clip_on=False, color="w", lw=2)
g.map(plt.axhline, y=0, lw=1, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "region")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.set(xticks=df.region[0::1])
g.despine(bottom=True, left=True)
plt.show()
First, I would replace the 0.0 values with NaN. But that's not sufficient because a large part of your problem is the horizontal lines that are added to replace the bottom spine that hides the fact that the curves stop at different point. If you remove those, then you're left with:
Personally, I'm not overly found of the tick white line that makes the narrow regions on the left and right very thin. I would tend to remove them:
full code:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
sns.set(style="whitegrid", rc={"axes.facecolor": (0, 0, 0, 0)})
df = pd.read_csv('https://gist.githubusercontent.com/willemvanopstal/c2892e68d6eb94194acd371e49d949bd/raw/642c4261198f0cacdf32aad21aebca5953c6cd75/stats_together_data.csv', delimiter=';')
df.loc[df["aandeel"]==0,"aandeel"]=np.nan
# Initialize the FacetGrid object
pal = sns.color_palette("Set2")
g = sns.FacetGrid(df, row="process", hue="process", aspect=10, height=0.6, palette=pal)
# Draw the densities in a few steps
g.map(sns.lineplot, "region", "aandeel", clip_on=False, alpha=1, lw=1.5)
g.map(plt.fill_between, "region", "aandeel", interpolate=True)
#g.map(sns.lineplot, "region", "aandeel", clip_on=False, color="w", lw=2)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "region")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.set(xticks=df.region[0::1])
g.despine(bottom=True, left=True)
plt.show()
I want to add a y-axis label to a density ridgeline plot using seaborn in python. To make the ridgeline plot, I am following the code from the seaborn gallery. For convenience, I have copied their code snippet below. How should I modify this to label the y-axis in a manner that does not overlap the density curves?
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
To be clear, I'd like the plot to look something like:
You can add the general label like a text use the follow code line g.fig.text(0.04, 0.5, 'Y axis label', va='center', rotation='vertical') and you will obtain the follow:
I think replacing g.fig.subplots_adjust(hspace=-.25) with g.fig.subplots_adjust(hspace=.1) should do the trick.
Is there is a way for the plot to not overlap with long labels of y axis in a ridge plot (seaborn). I am using the code of the example seaborn (slighty edited) in order to understand what I mean.
Thanks for any help.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
names = ['15--Specialist COP',
'16--Specialist other fieldcrops',
'20--Specialist horticulture',
'35--Specialist wine',
'36--Specialist orchards - fruits',
'37--Specialist olives',
'38--Permanent crops combined',
'45--Specialist milk',
'48--Specialist sheep and goats',
'49--Specialist cattle']
# 'Specialist granivores',
# 'Mixed crops',
# 'Mixed livestock',
# 'Mixed crops and livestock']
g = np.tile(names, 50)
df = pd.DataFrame(dict(x=x, g=g))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.show()
I would like to add a sample y-axis tick on the right side of the Ridge plot, to know what is the range of values of all the plots. Preferably I would like to add it only to one of the subplots and not to all of them.
My plot is based on the seaborn 'ridge plot' example at: https://seaborn.pydata.org/examples/kde_ridgeplot.html
I've tried the following code with no luck:
g.set(yticks=[0,200])
g.set_y_label_position("right")
g.set_ylabels('[Range]',fontsize=9,fontweight="normal")
If you want to modify one particular axes from a FacetGrid, you can get a reference from the list g.axes
Here is how I would go about it
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
#
# Changes from seaborn example below this point
#
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25, right=0.9)
# Remove axes details that don't play well with overlap
g.set_titles("")
#g.set(yticks=[])
g.despine(bottom=True, left=True, right=False, top=True, offset=5)
for ax in g.axes.ravel():
if ax.is_first_row(): # can use .is_last_row() to show spine on the bottom plot instead
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.set_ylabel("MW")
else:
ax.spines['right'].set_visible(False)
[l.set_visible(False) for l in ax.get_yticklabels()] # necessary because y-axes are shared