How to shift quartile lines in seaborn grouped violin plots?

How to shift quartile lines in seaborn grouped violin plots? - python

Consider the following seaborn grouped violinplot with split violins, where I inserted a small space inbetween.
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")
tips = sns.load_dataset("tips")
fig, ax = plt.subplots()
sns.violinplot(
data=tips, x="day", y="total_bill", hue="smoker", split=True, inner="quart", linewidth=1,
palette={"Yes": "b", "No": ".85"}, ax=ax
)
sns.despine(left=True)
delta = 0.025
for ii, item in enumerate(ax.collections):
if isinstance(item, matplotlib.collections.PolyCollection):
path, = item.get_paths()
vertices = path.vertices
if ii % 2: # -> to right
vertices[:, 0] += delta
else: # -> to left
vertices[:, 0] -= delta
plt.show()
How can I shift the quartile (and median) indicating dotted (and dashed) lines back inside the violins?

You can do it exactly the same way as you did with the violins:
for i, line in enumerate(ax.get_lines()):
line.get_path().vertices[:, 0] += delta if i // 3 % 2 else -delta

Related

matplotlib matshow xtick labels on top and bottom

I'm trying to visualize a name co-occurrence matrix. This version works okay:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position("bottom")
plt.setp(ax.get_xticklabels(), rotation=45,
ha="right", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
ax.text(j, i, str(z), ha="center", va="center")
ax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
The problem is that the actual matrix I have is fairly large, so I would like to display the names both on the top and the bottom. I've tried to do so by using twiny:
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
botax = plt.gca()
im = botax.matshow(df, interpolation='none')
fig.colorbar(im)
topax = botax.twiny()
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
ax.xaxis.set_ticks_position(pos)
plt.setp(ax.get_xticklabels(), rotation=45,
ha=ha, va="center", rotation_mode="anchor")
for (i,j), z in np.ndenumerate(df):
if z != 0:
botax.text(j, i, str(z), ha="center", va="center")
botax.set_title("Name Co-Occurrences")
fig.tight_layout()
plt.show()
Unfortunately the top labels aren't aligned correctly and I don't know why:

In order to label both, bottom and top of an axes, there is no need for a twin axes. This may make this all a bit easier. You can instead just turn the bottom and top ticks and labels on, and then rotate and align them separately.
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
n = 10
names = ['Long Name ' + suffix for suffix in string.ascii_uppercase[:n]]
df = pd.DataFrame(np.random.randint(0, 100, size=(n,n)),
columns=names, index=names)
fig = plt.figure()
ax = plt.gca()
im = ax.matshow(df, interpolation='none')
fig.colorbar(im)
ax.set_xticks(np.arange(n))
ax.set_xticklabels(names)
ax.set_yticks(np.arange(n))
ax.set_yticklabels(names)
# Set ticks on both sides of axes on
ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True)
# Rotate and align bottom ticklabels
plt.setp([tick.label1 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="right", va="center", rotation_mode="anchor")
# Rotate and align top ticklabels
plt.setp([tick.label2 for tick in ax.xaxis.get_major_ticks()], rotation=45,
ha="left", va="center",rotation_mode="anchor")
ax.set_title("Name Co-Occurrences", pad=55)
fig.tight_layout()
plt.show()

You will have to first set the aspect ratio of the upper x-axis to be the same as that of the lower x-axis. How to do this has been answered here. Then you can use y=1.3 to lift the title a bit upward so that it does not overlap with the upper x-axis tick labels.
topax = botax.twiny()
aspect0 = botax.get_aspect()
if aspect0 == 'equal':
aspect0 = 1.0
dy = np.abs(np.diff(botax.get_ylim()))
dx = np.abs(np.diff(botax.get_xlim()))
aspect = aspect0 / (float(dy) / dx)
topax.set_aspect(aspect)
for ax, ha, pos in zip([topax, botax], ["left", "right"], ["top", "bottom"]):
ax.set_xticks(np.arange(n))
.....
.....
botax.set_title("Name Co-Occurrences", y=1.3)

Matplotlib labeling sub subplots

I want to add labels to my plot which consists of sub subplots. Here is what I want (I added the outer labels with GIMP)
And this is what I actually get:
Here is the code that produces the last plot:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
plots = 16
subplots = 9
fig = plt.figure(figsize=(8, 8))
wh_plots = int(np.sqrt(plots))
wh_subplots = int(np.sqrt(subplots))
outer_grid = gridspec.GridSpec(wh_plots, wh_plots, wspace=0.1, hspace=0.1)
for p in range(plots):
inner_grid = gridspec.GridSpecFromSubplotSpec(wh_subplots, wh_subplots, subplot_spec=outer_grid[p], wspace=0.05, hspace=0.05)
for s in range(subplots):
ax = plt.Subplot(fig, inner_grid[s])
ax.imshow(np.random.rand(10,10), cmap="magma", interpolation="none")
ax.set_xticks([])
ax.set_yticks([])
fig.add_subplot(ax)
if (p+1) > 12 and s == 7:
ax.set_xlabel("sub_xlabel")
if (p) % 4 == 0 and s == 3:
ax.set_ylabel("sub_ylabel")
all_axes = fig.get_axes()
plt.show()
My questions:
How can I get the "xlabel" and "ylabel" as seen in the first plot?
Is there a better way to label the subplots (sub_xlabel / sub_ylabel)
compared to what I did?
if (p+1) > 12 and s == 7:
ax.set_xlabel("sub_xlabel")
if (p) % 4 == 0 and s == 3:
ax.set_ylabel("sub_ylabel")
It works, but it doesn't look right.

You can add these lines before plt.show():
fig.text(0.5, 0.04, 'xlabel', ha='center', fontsize=18)
fig.text(0.04, 0.5, 'ylabel', va='center', rotation='vertical', fontsize=18)

Labeling boxplot in seaborn with median value

How can I label each boxplot in a seaborn plot with the median value?
E.g.
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.boxplot(x="day", y="total_bill", data=tips)
How do I label each boxplot with the median or average value?

I love when people include sample datasets!
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
box_plot = sns.boxplot(x="day",y="total_bill",data=tips)
medians = tips.groupby(['day'])['total_bill'].median()
vertical_offset = tips['total_bill'].median() * 0.05 # offset from median for display
for xtick in box_plot.get_xticks():
box_plot.text(xtick,medians[xtick] + vertical_offset,medians[xtick],
horizontalalignment='center',size='x-small',color='w',weight='semibold')

Based on ShikharDua's approach, I created a version which works independent of tick positions. This comes in handy when dealing with grouped data in seaborn (i.e. hue=parameter). Additionally, I added a flier- and orientation-detection.
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
def add_median_labels(ax, fmt='.1f'):
lines = ax.get_lines()
boxes = [c for c in ax.get_children() if type(c).__name__ == 'PathPatch']
lines_per_box = int(len(lines) / len(boxes))
for median in lines[4:len(lines):lines_per_box]:
x, y = (data.mean() for data in median.get_data())
# choose value depending on horizontal or vertical plot orientation
value = x if (median.get_xdata()[1] - median.get_xdata()[0]) == 0 else y
text = ax.text(x, y, f'{value:{fmt}}', ha='center', va='center',
fontweight='bold', color='white')
# create median-colored border around white text for contrast
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=median.get_color()),
path_effects.Normal(),
])
tips = sns.load_dataset("tips")
ax = sns.boxplot(data=tips, x='day', y='total_bill', hue="sex")
add_median_labels(ax)
plt.show()

This can also be achieved by deriving median from the plot itself without exclusively computing median from data
box_plot = sns.boxplot(x="day", y="total_bill", data=tips)
ax = box_plot.axes
lines = ax.get_lines()
categories = ax.get_xticks()
for cat in categories:
# every 4th line at the interval of 6 is median line
# 0 -> p25 1 -> p75 2 -> lower whisker 3 -> upper whisker 4 -> p50 5 -> upper extreme value
y = round(lines[4+cat*6].get_ydata()[0],1)
ax.text(
cat,
y,
f'{y}',
ha='center',
va='center',
fontweight='bold',
size=10,
color='white',
bbox=dict(facecolor='#445A64'))
box_plot.figure.tight_layout()

frequency trail in matplotlib

I'm looking into outliers detection. Brendan Gregg has a really nice article and I'm especially intrigued by his visualizations. One of the methods he uses are frequency trails.
I'm trying to reproduce this in matplotlib using this example. Which looks like this:
And the plot is based on this answer: https://stackoverflow.com/a/4152016/948369
Now my issue is, like described by Brendan, that I have a continuous line that masks the outlier (I simplified the input values so you can still see them):
Any help on making the line "non-continuous" for non existent values?

Seaborn also provides a very neat example:
They call it a joy/ridge plot however: https://seaborn.pydata.org/examples/kde_ridgeplot.html
#!/usr/bin/python
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)

I would stick with a flat 2D plot and displace each level by a set vertical amount. You'll have to play the the levels (in the code below I called it displace) to properly see the outliers, but this does a pretty good job at replicating your target image. The key, I think, is to set the "zero" values to None so pylab does not draw them.
import numpy as np
import pylab as plt
import itertools
k = 20
X = np.linspace(0, 20, 500)
Y = np.zeros((k,X.size))
# Add some fake data
MU = np.random.random(k)
for n in xrange(k):
Y[n] += np.exp(-(X-MU[n]*n)**2 / (1+n/3))
Y *= 50
# Add some outliers for show
Y += 2*np.random.random(Y.shape)
displace = Y.max()/4
# Add a cutoff
Y[Y<1.0] = None
face_colors = itertools.cycle(["#D3D820", "#C9CC54",
"#D7DA66", "#FDFE42"])
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='black')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for n,y in enumerate(Y):
# Vertically displace each plot
y0 = np.ones(y.shape) * n * displace
y1 = y + n*displace
plt.fill_between(X, y0,y1,lw=1,
facecolor=face_colors.next(),
zorder=len(Y)-n)
plt.show()

How can I produce student-style graphs using matplotlib?

I am experimenting with matplotlib at the moment. Some time ago I used Excel VBA code to produce images such as the one attached.
You will notice it is not presented in a scientific/research style but rather as if produced by a school-student on graph paper - with three different grid-line styles.
Is there a fairly straightforward way to achieve this sort of thing with matplotlib?

Yes, you can use spines for this.
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import numpy as np
fig = plt.figure(1)
ax = fig.add_subplot(111)
# set up axis
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# draw curve
x = np.arange(-2.5,2.5,0.01)
line, = ax.plot(x, x**2)
#set bounds
ax.set_ybound(-1,7)
# create grid
#ax.xaxis.set_major_locator(MultipleLocator(1))
#ax.xaxis.set_minor_locator(MultipleLocator(0.2))
#ax.yaxis.set_major_locator(MultipleLocator(1))
#ax.yaxis.set_minor_locator(MultipleLocator(0.2))
#ax.xaxis.grid(True,'minor')
#ax.yaxis.grid(True,'minor')
#ax.xaxis.grid(True,'major',linewidth=2)
#ax.yaxis.grid(True,'major',linewidth=2)
#adjust grid on the 2s
#for idx,loc in enumerate(ax.xaxis.get_majorticklocs()):
#if loc !=0 and loc % 2 == 0: ax.get_xgridlines()[idx].set_c('r')
#for idx,loc in enumerate(ax.yaxis.get_majorticklocs()):
#if loc !=0 and loc % 2 == 0: ax.get_ygridlines()[idx].set_c('r')
## THIS IS THE EDIT
ax.xaxis.set_minor_locator(MultipleLocator(0.2))
ax.yaxis.set_minor_locator(MultipleLocator(0.2))
ax.xaxis.grid(True,'minor',linewidth=2)
ax.yaxis.grid(True,'minor',linewidth=2)
minor_grid_lines = [tick.gridline for tick in ax.xaxis.get_minor_ticks()]
for idx,loc in enumerate(ax.xaxis.get_minorticklocs()):
if loc % 2.0 == 0: minor_grid_lines[idx].set_c('r' )
elif loc % 1.0 == 0: minor_grid_lines[idx].set_c('g' )
else: minor_grid_lines[idx].set_c( 'b' )
plt.show()

This is a modified version of the accepted answer above.
Maybe somebody will find this helpful
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import numpy as np
from matplotlib.ticker import FormatStrFormatter
_fontsize_legend = 10
_fontsize = 15
DP = 2
fig = plt.figure(figsize=(12, 12), dpi=100, facecolor='w', edgecolor='k')
##fig = plt.figure()
fig.canvas.draw()
ax = plt.gca()
# set up axis
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# draw curve
x = np.arange(-2.5,2.5,0.01)
line, = ax.plot(x, x**2)
#set bounds
ax.set_ybound(-1,7)
## THIS IS THE EDIT
ax.xaxis.set_major_locator(MultipleLocator(1/4))
ax.yaxis.set_major_locator(MultipleLocator(1/4))
ax.xaxis.grid(True,'major',linewidth=2/DP,linestyle='-',color='#d7d7d7',zorder=0)
ax.yaxis.grid(True,'major',linewidth=2/DP,linestyle='-',color='#d7d7d7')
ax.xaxis.set_minor_locator(MultipleLocator( (1/4) / 5 ))
ax.yaxis.set_minor_locator(MultipleLocator( (1/4) / 5 ))
ax.xaxis.grid(True,'minor',linewidth=0.5/DP,linestyle='-',color='#d7d7d7')
ax.yaxis.grid(True,'minor',linewidth=0.5/DP,linestyle='-',color='#d7d7d7')
ax.set_axisbelow(True)
ax.set_aspect('equal')
##ax.axhline(linewidth=0)
##ax.axvline(linewidth=0)
ax.xaxis.set_major_formatter(FormatStrFormatter('%i'))
xticks = ax.xaxis.get_major_ticks()
for i,l in enumerate(xticks):
if not (i - 1) % 4 == 0:
xticks[i].label1.set_visible(False)
else:
xticks[i].label1.set_fontsize(_fontsize)
ax.yaxis.set_major_formatter(FormatStrFormatter('%i'))
yticks = ax.yaxis.get_major_ticks()
for i,l in enumerate(yticks):
if not (i - 1) % 4 == 0:
yticks[i].label1.set_visible(False)
else:
yticks[i].label1.set_fontsize(_fontsize)
figManager = plt.get_current_fig_manager()
figManager.window.showMaximized()
plt.show()

Just another thought - I have also tried to do it all with the minor gridlines (apart from anything else it will help my understanding), but it's not enumerating properly, no doubt due to the get_minorticklocs and ax.get_xgridlines. Sorry, and thanks in advance...
Geddes
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import numpy as np
fig = plt.figure(1)
ax = fig.add_subplot(111)
# set up axis
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# draw curve
x = np.arange(-2.5,2.5,0.01)
line, = ax.plot(x, x**2)
#set bounds
ax.set_ybound(-1,7)
# create grid
ax.xaxis.set_minor_locator(MultipleLocator(0.2))
ax.yaxis.set_minor_locator(MultipleLocator(0.2))
ax.xaxis.grid(True,'minor',linewidth=2)
ax.yaxis.grid(True,'minor',linewidth=2)
#adjust grid on the 2s
for idx,loc in enumerate(ax.xaxis.get_minorticklocs()):
if loc % 2 == 0: ax.get_xgridlines()[idx].set_color('r')
if loc % 1 == 0: ax.get_xgridlines()[idx].set_color('g')
if loc % 0.2 == 0: ax.get_xgridlines()[idx].set_color('b')
for idx,loc in enumerate(ax.yaxis.get_majorticklocs()):
if loc % 2 == 0: ax.get_ygridlines()[idx].set_c('b')
plt.savefig('spines3.png',dpi=300)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to shift quartile lines in seaborn grouped violin plots? - python

You can do it exactly the same way as you did with the violins: for i, line in enumerate(ax.get_lines()): line.get_path().vertices[:, 0] += delta if i // 3 % 2 else -delta

Related

matplotlib matshow xtick labels on top and bottom

Matplotlib labeling sub subplots

Labeling boxplot in seaborn with median value

frequency trail in matplotlib

How can I produce student-style graphs using matplotlib?

Categories

Resources