Addin median line to my seaborn multi columns plot - python
Can you please help me adding the median to each of the seaborn plots ? We have here one plot per specialty, and this is allowed by the following line of code:
sns.displot(data=all_df, x="exp", hue="sexe", col='specialite', kind="kde"), which gives:
Thank you very much
You could add the medians manually, e.g. using .map_dataframe(). Here is some example code using the penguins dataset:
import matplotlib.pyplot as plt
import seaborn as sns
def plot_medians(data, color):
for hue, color in zip(hue_order, palette):
plt.axvline(data[data['sex'] == hue][x_colummn].median(), color=color, ls=':')
penguins = sns.load_dataset('penguins').dropna()
hue_order = penguins['sex'].unique()
palette = ['dodgerblue', 'crimson']
x_colummn = "bill_length_mm"
g = sns.displot(data=penguins, x=x_colummn,
hue="sex", hue_order=hue_order, palette=palette,
col="species", kind="kde")
g.map_dataframe(plot_medians)
plt.show()
Related
How to add multiple custom ticks to seaborn boxplot
I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20). I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both. I really would appreciate any help. Thank you in advance for your time. Please, find below my code and the desired output. import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32], 'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'], 'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]}) df['Dates'] = df['Dates'].apply(pd.to_datetime) tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label #Plot fig,ax = plt.subplots(figsize=(6,6)) ax = sns.boxplot(x='nb_days',y='score',data=df,color=None) # iterate over boxes to change color for i,box in enumerate(ax.artists): box.set_edgecolor('red') box.set_facecolor('white') sns.stripplot(x='nb_days',y='score',data=df,color='black') ticks = sorted(df['nb_days'].unique()) labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)] ax.set_xticklabels(labels) plt.tight_layout() plt.show() plt.close() Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels) new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)] ax.set_xticklabels(new_labels) Output
Try this: import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32], 'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'], 'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]}) df['Dates'] = df['Dates'].apply(pd.to_datetime) tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label #Plot fig,ax = plt.subplots(figsize=(6,6)) ax = sns.boxplot(x='nb_days',y='score',data=df,color=None) # iterate over boxes to change color for i,box in enumerate(ax.artists): box.set_edgecolor('red') box.set_facecolor('white') sns.stripplot(x='nb_days',y='score',data=df,color='black') ticks = sorted(df['nb_days'].unique()) labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)] ax.set_xticklabels(labels) plt.tight_layout() plt.show() plt.close()
Customize Seaborn Hue Legend for Boxplot
When I tried to plot this boxplot figure , legend of age group was shown as below. %matplotlib inline import matplotlib.pyplot as plt import seaborn as sns import pandas as pd plt.figure(figsize=(14,7)) sns.set(style="white", palette="Blues", color_codes=True) f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df) plt.savefig("out.png",pad_inches=0.5) plt.show() But when I tried to customize the legend my code is plt.figure(figsize=(14,7)) sns.set(style="white", palette="Blues", color_codes=True) f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df) f.set_xlabel("Sex") f.set_ylabel("Systolic Blood Pressure") legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"] f.legend(title="Age Group", labels=legend_label) plt.savefig("out.png",pad_inches=0.5) plt.show() This f.legend(title="Age Group", labels=legend_label) line was able to customize the title and labels but it caused errors in the markers. I need to set the markers to the color pallet as it was in the previous figure.
As of seaborn 0.10.1, the legend label is stored in ax.legend_.texts[0], where ax is the matplotlib Axes returned by sns.boxplot(). This means that you can edit the legend label without changing anything else about the legend as follows. g = sns.boxplot(...) new_legend_label = 'Age Group' g.legend_.texts[0].set_text(new_legend_label) Depending on what version of seaborn you're using, the method might be different. See these answers from 2017 and 2019 for slightly different syntax with older versions.
Thank You Emerson Harkin. Your solution was useful. I just iterate over list of labels to update all. Here is my updated code and figure: %matplotlib inline import matplotlib.pyplot as plt import seaborn as sns import pandas as pd plt.figure(figsize=(14,7)) sns.set(style="white", palette="Blues", color_codes=True) f = sns.boxplot(x="RIAGENDRtxt", y="BPXSY1", hue="agegrp", data=df) f.set_xlabel("Sex") f.set_ylabel("Systolic Blood Pressure") legend_label = ["(18, 30)", "(30, 40)", "(40, 50)", "(50, 60)", "(60, 70)", "(70, 80)"] f.legend(title="Age Group") n = 0 for i in legend_label: f.legend_.texts[n].set_text(i) n += 1 plt.show() Updated Figure
Fix x-axis scale seaborn factorplot
I'm attempting to make a figure that shows two plots, with each plot separated based on a set of categorical data. However, although I can make the graph, I cant figure out how to get the x-axis to be properly spaced. I want the x-axis to start before the first value (want axis to start at 60 [first value = 63]) and end after the last (want axis to end at 95 [last value = 92.1]), with xticks going up in 5's. Any help is much appreciated! Thanks in advance! import pandas as pd import matplotlib.pyplot as plt import matplotlib.axes import seaborn as sns Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1] Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002] Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"] df = pd.DataFrame({"Temperature": Temperature, "Derivative": Derivative, "Category" : Category}) g = sns.factorplot(x="Temperature", y="Derivative", data=df, col="Category") g.set_xticklabels(step=10)
All the desired feature you describe suggest that using a factorplot here is absolutely the wrong choice. Instead use a normal matplotlib plot and then set the limits as usual, plt.xlim(60,95). import pandas as pd import matplotlib.pyplot as plt Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1] Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002] Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"] df = pd.DataFrame({"Temperature": Temperature, "Derivative": Derivative, "Category" : Category}) for n, data in df.groupby("Category"): plt.plot(data["Temperature"],data["Derivative"] , marker="o", label=n) plt.xlim(60,95) plt.legend() plt.show() Or if subplots are desired, fig,axes = plt.subplots(ncols=len(df["Category"].unique()), sharey=True) for ax,(n, data) in zip(axes,df.groupby("Category")): ax.plot(data["Temperature"],data["Derivative"] , marker="o", label=n) ax.set_title("Category {}".format(n)) ax.set_xlim(60,95) plt.show() Finally, you may use a seaborn FacetGrid onto which you plot your data with a plot: g = sns.FacetGrid(df, col="Category") g.map(plt.plot, "Temperature", "Derivative",marker="o",) for ax in g.axes.flat: ax.set_xlim(60,95) plt.show()
Is it possible to annotate a seaborn violin plot with number of observations in each group?
I would like to annotate my violin plot with the number of observations in each group. So the question is essentially the same as this one, except: python instead of R, seaborn instead of ggplot, and violin plots instead of boxplots Lets take this example from Seaborn API documentation: import seaborn as sns sns.set_style("whitegrid") tips = sns.load_dataset("tips") ax = sns.violinplot(x="day", y="total_bill", data=tips) I'd like to have n=62, n=19, n=87, and n=76 on top of the violins. Is this doable?
In this situation, I like to precompute the annotated values and incorporate them into the categorical axis. In other words, precompute e.g., "Thurs, N = xxx" That looks like this: import seaborn as sns sns.set_style("whitegrid") ax= ( sns.load_dataset("tips") .assign(count=lambda df: df['day'].map(df.groupby(by=['day'])['total_bill'].count())) .assign(grouper=lambda df: df['day'].astype(str) + '\nN = ' + df['count'].astype(str)) .sort_values(by='day') .pipe((sns.violinplot, 'data'), x="grouper", y="total_bill") .set(xlabel='Day of the Week', ylabel='Total Bill (USD)') )
You first need to store all values of y positions and x positions (using your dataset for that) in order to use ax.text, then a simple for loop can write everything in the positions desired: import seaborn as sns import matplotlib.pyplot as plt tips = sns.load_dataset("tips") ax = sns.violinplot(x="day", y="total_bill", data=tips) yposlist = tips.groupby(['day'])['total_bill'].median().tolist() xposlist = range(len(yposlist)) stringlist = ['n = 62','n = 19','n = 87','n = 76'] for i in range(len(stringlist)): ax.text(xposlist[i], yposlist[i], stringlist[i]) plt.show()
making colorbar values integer in a heatmap matplotlib seaborn
I'm trying to make my colourbar have integer values instead of decimals, but coding this is a lot harder than anticipated. my initial code import pandas as pd import matplotlib.pyplot as plt import numpy as np import seaborn as sns #sns.set() # read data revels_data = pd.read_csv("revels2.txt") rd = revels_data revels = rd.pivot("Flavour", "Packet number", "Contents") # orders flavours revels.index = pd.CategoricalIndex(revels.index, categories=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"]) revels.sortlevel(level=0, inplace=True) # Draw a heatmap with the numeric values in each cell ax = sns.heatmap(revels, annot=True, fmt="d", linewidths=0.4, cmap="YlOrRd") ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold") plt.show() which produces Trying to reverse engineer one of the answers from here by adding the following code cmap = plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1) plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1) # set limits .5 outside true range mat = plt.matshow(rd.Contents, cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5) plt.matshow(rd.Contents ,cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5) #tell the colorbar to tick at integers cax = plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1)) plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1)) but getting errors, namely ValueError: not enough values to unpack. I think I may have applied the code wrong, would appreciate any help.
Here is a full working example, which creates a discrete colorbar for a seaborn heatmap plot with integer values as colorbar ticks. import pandas as pd import numpy as np; np.random.seed(8) import matplotlib.pyplot as plt import seaborn.apionly as sns plt.rcParams["figure.figsize"] = 10,5.5 flavours=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"] num = np.arange(0, 6*36).astype(int) % 36 flavs = np.random.choice(flavours, size=len(num)) conts = np.random.randint(0,6, len(num)).astype(int) df = pd.DataFrame({"Packet number":num ,"Flavour":flavs,"Contents" : conts}) revels = pd.pivot_table(df, index=["Flavour"], columns=["Packet number"], values="Contents", aggfunc=np.sum) revels.index = pd.CategoricalIndex(revels.index, categories=flavours) revels.sortlevel(level=0, inplace=True) revels= revels.fillna(0) ticks=np.arange(revels.values.min(),revels.values.max()+1 ) boundaries = np.arange(revels.values.min()-.5,revels.values.max()+1.5 ) cmap = plt.get_cmap("YlOrRd", revels.values.max()-revels.values.min()+1) ax = sns.heatmap(revels, annot=True, linewidths=0.4, cmap=cmap, cbar_kws={"ticks":ticks, "boundaries":boundaries}) ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold") plt.tight_layout() plt.show()