Altair boxplot with nested grouping by two categorical variables - python

How can I reproduce the following graph done in seaborn in altair?
import seaborn as sns
tips = sns.load_dataset("tips")
ax = sns.boxplot(x="day", y="total_bill", hue="smoker",
data=tips, palette="Set3")
This was my attempt:
import altair as alt
chart = (
alt.Chart(tips)
.mark_boxplot()
.encode(x=alt.X("day"), y=alt.Y("total_bill"), color="smoker")
.interactive()
.properties(width=300))
chart.show()
which gives me this not wanted graph:

Put smoker on the x-axis and use the column facet for the day and play a bit with the padding and spacing:
chart = alt.Chart(df).mark_boxplot(ticks=True).encode(
x=alt.X("smoker:O", title=None, axis=alt.Axis(labels=False, ticks=False), scale=alt.Scale(padding=1)),
y=alt.Y("total_bill:Q"),
color="smoker:N",
column=alt.Column('day:N', sort=['Thur','Fri','Sat','Sun'], header=alt.Header(orient='bottom'))
).properties(
width=100
).configure_facet(
spacing=0
).configure_view(
stroke=None
)
chart

Related

Stacking to 100% with `seaborn.objects`

I'm trying to make a plot with bars or areas rescaled to 100% with the new seaborn.objects interface and I can't seem to get so.Norm() to work, with or without by...
Here's what I've got so far:
import seaborn as sns
import seaborn.objects as so
tips = sns.load_dataset("tips")
# bars
(
so.Plot(tips, x="day", y="total_bill", color="time")
.add(so.Bar(), so.Agg("sum"), so.Norm(func="sum"), so.Stack())
)
#areas
(
so.Plot(tips, x="size", y="total_bill", color="time")
.add(so.Area(), so.Agg("sum"), so.Norm(func="sum"), so.Stack())
)
I think that you intend for the height of each (stacked) bar to equal 1, so you'd want to aggregate by x values when normalizing:
(
so.Plot(tips, x="day", y="total_bill", color="time")
.add(so.Bar(), so.Agg("sum"), so.Norm(func="sum", by=["x"]), so.Stack())
)

Plotly: How to display the total sum of the values at top of a stacked bar chart along with the individual bar values?

I am trying to add the total at the top of the each stacked bar along with the individual bar values in Plotly Express in Python.
import plotly.express as px
df = px.data.medals_long()
fig = px.bar(df, x="medal", y="count", color="nation", text_auto=True)
fig.show()
This gives the below result
However I want the chart as below:
Although it can be annotated as a string, the easiest way is to add a graph in the text mode of a scatter plot.
import plotly.express as px
import plotly.graph_objects as go
df = px.data.medals_long()
dfs = df.groupby('medal').sum()
fig = px.bar(df, x="medal", y="count", color="nation", text_auto=True)
fig.add_trace(go.Scatter(
x=dfs.index,
y=dfs['count'],
text=dfs['count'],
mode='text',
textposition='top center',
textfont=dict(
size=18,
),
showlegend=False
))
fig.update_yaxes(range=[0,50])
fig.show()

Pie chart enclosed with a black line (rectangle)

Below you can see my data and facet plot in matplotlib.
import pandas as pd
import numpy as np
pd.set_option('max_columns', None)
import matplotlib.pyplot as plt
import matplotlib as mpl
# Data
data = {
'type_sale': ['g_1','g_2','g_3','g_4','g_5','g_6','g_7','g_8','g_9','g_10'],
'open':[70,20,24,150,80,90,60,90,20,20],
'closed':[30,14,20,10,20,40,10,10,10,10],
}
df = pd.DataFrame(data, columns = ['type_sale',
'open',
'closed',
])
data1 = {
'type_sale': [ 'open','closed'],
'structure':[70,30],
}
df1 = pd.DataFrame(data1, columns = ['type_sale',
'structure',
])
# Ploting
labels = ['open','closed']
fig, axs = plt.subplots(2,2, figsize=(10,8))
plt.subplots_adjust(wspace=0.2, hspace=0.6)
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe', ax=axs[0,1])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe',ax=axs[1,0])
df.plot(x='type_sale', kind='bar', stacked=True,title='Stacked Bar Graph by dataframe', ax=axs[1,1])
plt.suptitle(t='Stacked Bar Graph by dataframe', fontsize=16)
plt.show()
If you compare the first pie plot with others, you can spot a big difference. Namely, the first pie plot is not enclosed with a black line (rectangle), while the other is enclosed.
So can anybody help me with how to solve this problem?
After playing around myself, it seems that this is working, but I think the pie gets stretched, which doesn't look that good.
EDIT
found a better solution with set_adjustable
also two options how you create the piechart, the frame and ticks differ in a bit.
# 1
axs[0,0].pie(df1['structure'],labels=labels,autopct='%1.1f%%',frame=True,radius=10)
axs[0,0].set_title('Stacked Bar Graph by dataframe')
# 2
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
axs[0,0].set_frame_on(True)
axs[0,0].set_adjustable('datalim')

Seaborn title error - AttributeError: 'FacetGrid' object has no attribute 'set_title

I created a lineplot graph to begin with using the following code:
plot = sns.lineplot(data=tips,
x="sex",
y="tip",
ci=50,
hue="day",
palette="Accent")
plot.set_title("Value of Tips Given to Waiters, by Days of the Week and Sex", fontsize=24, pad=30, fontdict={"weight": "bold"})
plot.legend("")
I have realised that its actually a catplot chart that I need so I amended the code to the following:
plot = sns.catplot (data=tips,
x="day",
y="tip",
kind='bar',
ci=50,
hue="sex",
palette="Accent")
plot.set_title("Value of Tips Given to Waiters, by Days of the Week and Sex", fontsize=24, pad=30, fontdict={"weight": "bold"})
plot.legend("")
However I am getting the following error message with the title: 'AttributeError: 'FacetGrid' object has no attribute 'set_title''.
Why is my title not working for the catplot chart?
When you call catplot, it returns a FacetGrid object, so to change the the title and remove legend, you have to use the legend= option inside the function, and also use plot.fig.suptitle() :
import seaborn as sns
tips = sns.load_dataset("tips")
plot = sns.catplot (data=tips,
x="day",
y="tip",
kind='bar',
ci=50,
hue="sex",
palette="Accent", legend=False)
plot.fig.suptitle("Value of Tips Given to Waiters, by Days of the Week and Sex",
fontsize=24, fontdict={"weight": "bold"})
Same answer as #StupidWolf, but with additional adjustment for title position:
import seaborn as sns
tips = sns.load_dataset("tips")
plot = sns.catplot (data=tips,
x="day",
y="tip",
kind='bar',
ci=50,
hue="sex",
palette="Accent", legend=False);
plot.figure.subplots_adjust(top=0.8);
plot.figure.suptitle(
"Value of Tips Given to Waiters, by Days of the Week and Sex",
fontsize=24,
fontdict={"weight": "bold"}
);

Is it possible to annotate a seaborn violin plot with number of observations in each group?

I would like to annotate my violin plot with the number of observations in each group. So the question is essentially the same as this one, except:
python instead of R,
seaborn instead of ggplot, and
violin plots instead of boxplots
Lets take this example from Seaborn API documentation:
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.violinplot(x="day", y="total_bill", data=tips)
I'd like to have n=62, n=19, n=87, and n=76 on top of the violins. Is this doable?
In this situation, I like to precompute the annotated values and incorporate them into the categorical axis. In other words, precompute e.g., "Thurs, N = xxx"
That looks like this:
import seaborn as sns
sns.set_style("whitegrid")
ax= (
sns.load_dataset("tips")
.assign(count=lambda df: df['day'].map(df.groupby(by=['day'])['total_bill'].count()))
.assign(grouper=lambda df: df['day'].astype(str) + '\nN = ' + df['count'].astype(str))
.sort_values(by='day')
.pipe((sns.violinplot, 'data'), x="grouper", y="total_bill")
.set(xlabel='Day of the Week', ylabel='Total Bill (USD)')
)
You first need to store all values of y positions and x positions (using your dataset for that) in order to use ax.text, then a simple for loop can write everything in the positions desired:
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
ax = sns.violinplot(x="day", y="total_bill", data=tips)
yposlist = tips.groupby(['day'])['total_bill'].median().tolist()
xposlist = range(len(yposlist))
stringlist = ['n = 62','n = 19','n = 87','n = 76']
for i in range(len(stringlist)):
ax.text(xposlist[i], yposlist[i], stringlist[i])
plt.show()

Categories

Resources