I have copied the RidgePlot example and adopted it to my needs. I have included the working script and some example data to test.
Image:
Some of my data have y-values equal to 0.0. I would like to emphasise that in my plot. I'm not sure which part of the plot, the fill_between or just every x-axis can be changed to for example remove the mapped colour on those places where "aandeel" == 0.0.
Any ideas on how to remove the colours here? Is it simply possible to remove the x-axes/set width to 0/change color to different?
example data (csv): https://gist.github.com/willemvanopstal/c2892e68d6eb94194acd371e49d949bd
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
sns.set(style="whitegrid", rc={"axes.facecolor": (0, 0, 0, 0)})
data = pd.read_csv('stats_together_data.csv', delimiter=';')
df = data
# Initialize the FacetGrid object
pal = sns.color_palette("Set2")
g = sns.FacetGrid(df, row="process", hue="process", aspect=10, height=0.6, palette=pal)
# Draw the densities in a few steps
g.map(sns.lineplot, "region", "aandeel", clip_on=False, alpha=1, lw=1.5)
g.map(plt.fill_between, "region", "aandeel", interpolate=True)
g.map(sns.lineplot, "region", "aandeel", clip_on=False, color="w", lw=2)
g.map(plt.axhline, y=0, lw=1, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "region")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.set(xticks=df.region[0::1])
g.despine(bottom=True, left=True)
plt.show()
First, I would replace the 0.0 values with NaN. But that's not sufficient because a large part of your problem is the horizontal lines that are added to replace the bottom spine that hides the fact that the curves stop at different point. If you remove those, then you're left with:
Personally, I'm not overly found of the tick white line that makes the narrow regions on the left and right very thin. I would tend to remove them:
full code:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
sns.set(style="whitegrid", rc={"axes.facecolor": (0, 0, 0, 0)})
df = pd.read_csv('https://gist.githubusercontent.com/willemvanopstal/c2892e68d6eb94194acd371e49d949bd/raw/642c4261198f0cacdf32aad21aebca5953c6cd75/stats_together_data.csv', delimiter=';')
df.loc[df["aandeel"]==0,"aandeel"]=np.nan
# Initialize the FacetGrid object
pal = sns.color_palette("Set2")
g = sns.FacetGrid(df, row="process", hue="process", aspect=10, height=0.6, palette=pal)
# Draw the densities in a few steps
g.map(sns.lineplot, "region", "aandeel", clip_on=False, alpha=1, lw=1.5)
g.map(plt.fill_between, "region", "aandeel", interpolate=True)
#g.map(sns.lineplot, "region", "aandeel", clip_on=False, color="w", lw=2)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "region")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.set(xticks=df.region[0::1])
g.despine(bottom=True, left=True)
plt.show()
Related
I want to add a y-axis label to a density ridgeline plot using seaborn in python. To make the ridgeline plot, I am following the code from the seaborn gallery. For convenience, I have copied their code snippet below. How should I modify this to label the y-axis in a manner that does not overlap the density curves?
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
To be clear, I'd like the plot to look something like:
You can add the general label like a text use the follow code line g.fig.text(0.04, 0.5, 'Y axis label', va='center', rotation='vertical') and you will obtain the follow:
I think replacing g.fig.subplots_adjust(hspace=-.25) with g.fig.subplots_adjust(hspace=.1) should do the trick.
Is there is a way for the plot to not overlap with long labels of y axis in a ridge plot (seaborn). I am using the code of the example seaborn (slighty edited) in order to understand what I mean.
Thanks for any help.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
names = ['15--Specialist COP',
'16--Specialist other fieldcrops',
'20--Specialist horticulture',
'35--Specialist wine',
'36--Specialist orchards - fruits',
'37--Specialist olives',
'38--Permanent crops combined',
'45--Specialist milk',
'48--Specialist sheep and goats',
'49--Specialist cattle']
# 'Specialist granivores',
# 'Mixed crops',
# 'Mixed livestock',
# 'Mixed crops and livestock']
g = np.tile(names, 50)
df = pd.DataFrame(dict(x=x, g=g))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.show()
I would like to add a sample y-axis tick on the right side of the Ridge plot, to know what is the range of values of all the plots. Preferably I would like to add it only to one of the subplots and not to all of them.
My plot is based on the seaborn 'ridge plot' example at: https://seaborn.pydata.org/examples/kde_ridgeplot.html
I've tried the following code with no luck:
g.set(yticks=[0,200])
g.set_y_label_position("right")
g.set_ylabels('[Range]',fontsize=9,fontweight="normal")
If you want to modify one particular axes from a FacetGrid, you can get a reference from the list g.axes
Here is how I would go about it
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
#
# Changes from seaborn example below this point
#
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25, right=0.9)
# Remove axes details that don't play well with overlap
g.set_titles("")
#g.set(yticks=[])
g.despine(bottom=True, left=True, right=False, top=True, offset=5)
for ax in g.axes.ravel():
if ax.is_first_row(): # can use .is_last_row() to show spine on the bottom plot instead
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.set_ylabel("MW")
else:
ax.spines['right'].set_visible(False)
[l.set_visible(False) for l in ax.get_yticklabels()] # necessary because y-axes are shared
I have a dataframe hour_dist that shows the hour a customer showed up to a particular location.
hour_dist.sample(5)
Location Hour
88131 1233000000000000 21
111274 1233000000000000 0
81126 2991000000000000 23
104181 1232000000000000 22
55719 1232000000000000 15
I'm trying to plot this data with Seaborn to visualize a ridgeline plot (https://seaborn.pydata.org/examples/kde_ridgeplot.html).
It should essentially show the hour distribution by each location. Here's an example of what it looks like:
With hour_dist, I've been trying, unsuccessfully, to plot the locations on the y axis and the hour on the x axis.
For me working change g to Location and x to Hour, but if many unique Location values there should be many plots with real data:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="Location", hue="Location", aspect=15, height=.5, palette=pal)
If need plot by percentage:
#df['pct'] = df['Location'].div(df.groupby('Hour')['Location'].transform('sum'))
#g = sns.FacetGrid(df, row="pct", hue="pct", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "Hour", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "Hour", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "Hour")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
I'm looking into outliers detection. Brendan Gregg has a really nice article and I'm especially intrigued by his visualizations. One of the methods he uses are frequency trails.
I'm trying to reproduce this in matplotlib using this example. Which looks like this:
And the plot is based on this answer: https://stackoverflow.com/a/4152016/948369
Now my issue is, like described by Brendan, that I have a continuous line that masks the outlier (I simplified the input values so you can still see them):
Any help on making the line "non-continuous" for non existent values?
Seaborn also provides a very neat example:
They call it a joy/ridge plot however: https://seaborn.pydata.org/examples/kde_ridgeplot.html
#!/usr/bin/python
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
I would stick with a flat 2D plot and displace each level by a set vertical amount. You'll have to play the the levels (in the code below I called it displace) to properly see the outliers, but this does a pretty good job at replicating your target image. The key, I think, is to set the "zero" values to None so pylab does not draw them.
import numpy as np
import pylab as plt
import itertools
k = 20
X = np.linspace(0, 20, 500)
Y = np.zeros((k,X.size))
# Add some fake data
MU = np.random.random(k)
for n in xrange(k):
Y[n] += np.exp(-(X-MU[n]*n)**2 / (1+n/3))
Y *= 50
# Add some outliers for show
Y += 2*np.random.random(Y.shape)
displace = Y.max()/4
# Add a cutoff
Y[Y<1.0] = None
face_colors = itertools.cycle(["#D3D820", "#C9CC54",
"#D7DA66", "#FDFE42"])
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='black')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for n,y in enumerate(Y):
# Vertically displace each plot
y0 = np.ones(y.shape) * n * displace
y1 = y + n*displace
plt.fill_between(X, y0,y1,lw=1,
facecolor=face_colors.next(),
zorder=len(Y)-n)
plt.show()