Addin median line to my seaborn multi columns plot - python

Can you please help me adding the median to each of the seaborn plots ? We have here one plot per specialty, and this is allowed by the following line of code:
sns.displot(data=all_df, x="exp", hue="sexe", col='specialite', kind="kde"), which gives:
Thank you very much

You could add the medians manually, e.g. using .map_dataframe(). Here is some example code using the penguins dataset:
import matplotlib.pyplot as plt
import seaborn as sns
def plot_medians(data, color):
for hue, color in zip(hue_order, palette):
plt.axvline(data[data['sex'] == hue][x_colummn].median(), color=color, ls=':')
penguins = sns.load_dataset('penguins').dropna()
hue_order = penguins['sex'].unique()
palette = ['dodgerblue', 'crimson']
x_colummn = "bill_length_mm"
g = sns.displot(data=penguins, x=x_colummn,
hue="sex", hue_order=hue_order, palette=palette,
col="species", kind="kde")
g.map_dataframe(plot_medians)
plt.show()

Related

How to add multiple custom ticks to seaborn boxplot

I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()

Customize Seaborn Hue Legend for Boxplot

When I tried to plot this boxplot figure , legend of age group was shown as below.
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
But when I tried to customize the legend my code is
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group", labels=legend_label)
plt.savefig("out.png",pad_inches=0.5)
plt.show()
This f.legend(title="Age Group", labels=legend_label) line was able to customize the title and labels but it caused errors in the markers. I need to set the markers to the color pallet as it was in the previous figure.
As of seaborn 0.10.1, the legend label is stored in ax.legend_.texts[0], where ax is the matplotlib Axes returned by sns.boxplot(). This means that you can edit the legend label without changing anything else about the legend as follows.
g = sns.boxplot(...)
new_legend_label = 'Age Group'
g.legend_.texts[0].set_text(new_legend_label)
Depending on what version of seaborn you're using, the method might be different. See these answers from 2017 and 2019 for slightly different syntax with older versions.
Thank You Emerson Harkin. Your solution was useful. I just iterate over list of labels to update all. Here is my updated code and figure:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.figure(figsize=(14,7))
sns.set(style="white", palette="Blues", color_codes=True)
f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df)
f.set_xlabel("Sex")
f.set_ylabel("Systolic Blood Pressure")
legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"]
f.legend(title="Age Group")
n = 0
for i in legend_label:
f.legend_.texts[n].set_text(i)
n += 1
plt.show()
Updated Figure

Fix x-axis scale seaborn factorplot

I'm attempting to make a figure that shows two plots, with each plot separated based on a set of categorical data. However, although I can make the graph, I cant figure out how to get the x-axis to be properly spaced.
I want the x-axis to start before the first value (want axis to start at 60 [first value = 63]) and end after the last (want axis to end at 95 [last value = 92.1]), with xticks going up in 5's.
Any help is much appreciated! Thanks in advance!
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.axes
import seaborn as sns
Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1]
Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002]
Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"]
df = pd.DataFrame({"Temperature": Temperature,
"Derivative": Derivative,
"Category" : Category})
g = sns.factorplot(x="Temperature", y="Derivative", data=df, col="Category")
g.set_xticklabels(step=10)
All the desired feature you describe suggest that using a factorplot here is absolutely the wrong choice. Instead use a normal matplotlib plot and then set the limits as usual, plt.xlim(60,95).
import pandas as pd
import matplotlib.pyplot as plt
Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1]
Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002]
Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"]
df = pd.DataFrame({"Temperature": Temperature,
"Derivative": Derivative,
"Category" : Category})
for n, data in df.groupby("Category"):
plt.plot(data["Temperature"],data["Derivative"] , marker="o", label=n)
plt.xlim(60,95)
plt.legend()
plt.show()
Or if subplots are desired,
fig,axes = plt.subplots(ncols=len(df["Category"].unique()), sharey=True)
for ax,(n, data) in zip(axes,df.groupby("Category")):
ax.plot(data["Temperature"],data["Derivative"] , marker="o", label=n)
ax.set_title("Category {}".format(n))
ax.set_xlim(60,95)
plt.show()
Finally, you may use a seaborn FacetGrid onto which you plot your data with a plot:
g = sns.FacetGrid(df, col="Category")
g.map(plt.plot, "Temperature", "Derivative",marker="o",)
for ax in g.axes.flat:
ax.set_xlim(60,95)
plt.show()

Is it possible to annotate a seaborn violin plot with number of observations in each group?

I would like to annotate my violin plot with the number of observations in each group. So the question is essentially the same as this one, except:
python instead of R,
seaborn instead of ggplot, and
violin plots instead of boxplots
Lets take this example from Seaborn API documentation:
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.violinplot(x="day", y="total_bill", data=tips)
I'd like to have n=62, n=19, n=87, and n=76 on top of the violins. Is this doable?
In this situation, I like to precompute the annotated values and incorporate them into the categorical axis. In other words, precompute e.g., "Thurs, N = xxx"
That looks like this:
import seaborn as sns
sns.set_style("whitegrid")
ax= (
sns.load_dataset("tips")
.assign(count=lambda df: df['day'].map(df.groupby(by=['day'])['total_bill'].count()))
.assign(grouper=lambda df: df['day'].astype(str) + '\nN = ' + df['count'].astype(str))
.sort_values(by='day')
.pipe((sns.violinplot, 'data'), x="grouper", y="total_bill")
.set(xlabel='Day of the Week', ylabel='Total Bill (USD)')
)
You first need to store all values of y positions and x positions (using your dataset for that) in order to use ax.text, then a simple for loop can write everything in the positions desired:
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
ax = sns.violinplot(x="day", y="total_bill", data=tips)
yposlist = tips.groupby(['day'])['total_bill'].median().tolist()
xposlist = range(len(yposlist))
stringlist = ['n = 62','n = 19','n = 87','n = 76']
for i in range(len(stringlist)):
ax.text(xposlist[i], yposlist[i], stringlist[i])
plt.show()

making colorbar values integer in a heatmap matplotlib seaborn

I'm trying to make my colourbar have integer values instead of decimals, but coding this is a lot harder than anticipated.
my initial code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
#sns.set()
# read data
revels_data = pd.read_csv("revels2.txt")
rd = revels_data
revels = rd.pivot("Flavour", "Packet number", "Contents")
# orders flavours
revels.index = pd.CategoricalIndex(revels.index, categories=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"])
revels.sortlevel(level=0, inplace=True)
# Draw a heatmap with the numeric values in each cell
ax = sns.heatmap(revels, annot=True, fmt="d", linewidths=0.4, cmap="YlOrRd")
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.show()
which produces
Trying to reverse engineer one of the answers from here
by adding the following code
cmap = plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
# set limits .5 outside true range
mat = plt.matshow(rd.Contents, cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
plt.matshow(rd.Contents ,cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
#tell the colorbar to tick at integers
cax = plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
but getting errors, namely ValueError: not enough values to unpack.
I think I may have applied the code wrong, would appreciate any help.
Here is a full working example, which creates a discrete colorbar for a seaborn heatmap plot with integer values as colorbar ticks.
import pandas as pd
import numpy as np; np.random.seed(8)
import matplotlib.pyplot as plt
import seaborn.apionly as sns
plt.rcParams["figure.figsize"] = 10,5.5
flavours=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"]
num = np.arange(0, 6*36).astype(int) % 36
flavs = np.random.choice(flavours, size=len(num))
conts = np.random.randint(0,6, len(num)).astype(int)
df = pd.DataFrame({"Packet number":num ,"Flavour":flavs,"Contents" : conts})
revels = pd.pivot_table(df, index=["Flavour"], columns=["Packet number"], values="Contents", aggfunc=np.sum)
revels.index = pd.CategoricalIndex(revels.index, categories=flavours)
revels.sortlevel(level=0, inplace=True)
revels= revels.fillna(0)
ticks=np.arange(revels.values.min(),revels.values.max()+1 )
boundaries = np.arange(revels.values.min()-.5,revels.values.max()+1.5 )
cmap = plt.get_cmap("YlOrRd", revels.values.max()-revels.values.min()+1)
ax = sns.heatmap(revels, annot=True, linewidths=0.4, cmap=cmap,
cbar_kws={"ticks":ticks, "boundaries":boundaries})
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.tight_layout()
plt.show()

Categories

Resources