Sort grouped barchart with plotly - python

I am trying to create a grouped bar chart, which is working with my code so far. However, I can't find a way to sort the groupings among each other, how is that possible with plotly?
Example data of bar_df:
4061 4144 4181 4331
lr 45.9089 65.0693 37.0036 47.3485
knn 64.8903 87.25 48.278 81.9212
bay_r 51.9641 63.5313 39.7762 46.4237
svr 52.7827 63.4806 37.032 46.1108
Current Code for plot:
partners = bar_df.columns
fig = go.Figure()
for algo in ["lr","knn","bay_r","svr"]:
fig.add_trace(go.Bar(
x=partners,
y=bar_df[bar_df.index == algo].values[0],
name=algo,
opacity=0.75
))
fig.update_layout(
width=1550,
height=450,
barmode='group',
title={
'text': f'Performance Modell-Vergleich',
'y': 0.9,
'x': 0.5,
},
yaxis_title="MAE",
xaxis_tickangle=-45
)
fig.show()
Image of the result of the current code:

You have not defined your order. An approach is to use https://pandas.pydata.org/docs/reference/api/pandas.CategoricalIndex.html to be able to define the order of the categories.
import pandas as pd
import plotly.express as px
import io
df = pd.read_csv(io.StringIO(""" 4061 4144 4181 4331
lr 45.9089 65.0693 37.0036 47.3485
knn 64.8903 87.25 48.278 81.9212
bay_r 51.9641 63.5313 39.7762 46.4237
svr 52.7827 63.4806 37.032 46.1108"""), sep="\s+")
# use pandas categorical to sort categories
df = df.set_index(pd.CategoricalIndex(df.index, ordered=True, categories=['svr', 'bay_r', 'knn', 'lr'])).sort_index()
# create figure with px, it's simpler
px.bar(df.reset_index().melt(id_vars="index"), color="index", x="variable", y="value").update_layout(
# width=1550,
height=450,
barmode='group',
title={
'text': f'Performance Modell-Vergleich',
'y': 0.9,
'x': 0.5,
},
yaxis_title="MAE",
xaxis_tickangle=-45
)

Related

Multiple boxplots on the same graph

I need to create multiple boxplots on the same graph. The sports are 3. I need to obtain 3 boxplots on the same graph of each sport, with a specific variable on the y-axis. I need to be able to change the variable. The variable for each student is registered various times and is given by the mean of the 3 largest numbers. I have 30 students identified with an ID (that goes from 1 to 30). Each student does only one sport. This is what I wrote but clearly it doesn't work. Can someone help? I hope my explanation made sense.
def boxplot(sport, variable):
list=[]
for l in range(1,31):
g = df[(df.ID == l) & (df.sport == sport)][variable].nlargest(n=3).mean()
list.append(g)
my_dict = {f'Boxplot for {variable} in {sport}': list}
fig, ax = plt.subplots()
ax.boxplot(my_dict.values())
ax.set_xticklabels(my_dict.keys())
plt.show()
Here's one way to do it.
import plotly.express as px
df = px.data.tips()
fig = px.box(df, x="day", y="total_bill", color="smoker")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()
If you data is not melted or stacked, you can change the layout like this.
https://pandas.pydata.org/docs/reference/api/pandas.melt.html
Finally, for Matplotlib, you can do it like this.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Results of the long jump finals at two Olympic Games
data = pd.DataFrame({
'London 2012 (Men)': [8.31, 8.16, 8.12, 8.11, 8.10, 8.07, 8.01, 7.93],
'Rio 2016 (Men)': [8.38, 8.37, 8.29, 8.25, 8.17, 8.10, 8.06, 8.05],
'London 2012 (Women)': [7.12, 7.07, 6.89, 6.88, 6.77, 6.76, 6.72, 6.67],
'Rio 2016 (Women)': [7.17, 7.15, 7.08, 6.95, 6.81, 6.79, 6.74, 6.69]
})
# Plot
bp = plt.boxplot(
# A data frame needs to be converted to an array before it can be plotted this way
np.array(data),
# You can use the column headings from the data frame as labels
labels=list(data)
)
# Axis details
plt.title('Long Jump Finals')
plt.ylabel('Distance [m]')
plt.xlabel('Olympics')
plt.minorticks_on()
plt.tick_params(axis='x', which='minor', bottom=False)
plt.tick_params(axis='x', which='major', labelsize='small')
plt.show()
Here is one final update. Make sure the y-axis is numeric...
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plot
df = px.data.tips()
df=pd.DataFrame(df)
print(type(df))
df.head()
df.columns = ['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']
b_plot = df.boxplot(column = ['tip','size','total_bill'])
b_plot.plot()
plot.show()

Python PLOTLY I want to make the circles clearer

import plotly.express as px
import pandas as pd
data = pd.read_csv("Book1.csv")
fig = px.scatter(data, y="Category", x="Mean", color="Change")
fig.update_layout(
xaxis=dict(title="Title",range=[2,3],),
yaxis=dict(title="Mean"),
title="Title"
)
fig.update_traces(marker=dict(size=30,
line=dict(width=2,
color='DarkSlateGrey')),
selector=dict(mode='markers'))
fig.show()
I want to make the circles clearer, like more spaced out or scattered. Do you have any suggestions?
Here is the plot:
There is a technique called jittering where you add a small amount of noise to make it less likely for the circles to overlap as much as in your sample figure. It's not perfect, but here is an example of what you can accomplish. You can also try regenerating the plot with a different amount of jittering, as well as different random seeds until you are happy with the result.
import plotly.express as px
import numpy as np
import pandas as pd
# data = pd.read_csv("Book1.csv")
data = pd.DataFrame({
'Category': ['More than usual']*5 + ['About the same']*5 + ['Less than usual']*5,
'Mean': [2.2,2.4,2.22,2.24,2.6] + [2.4,2.41,2.5,2.1,2.12] + [2.81,2.1,2.5,2.45,2.42],
'Change': [1]*5 + [2]*5 + [3]*5
})
category_to_value_map = {
'Less than usual': 1,
'About the same': 2,
'More than usual': 3
}
data['y'] = data['Category'].map(category_to_value_map)
## apply jittering
max_jittering = 0.15
np.random.seed(4)
data['y'] = data['y'] + np.random.uniform(
low=-1*max_jittering,
high=max_jittering,
size=len(data)
)
fig = px.scatter(data, y="y", x="Mean", color="Change")
fig.update_layout(
xaxis=dict(title="Title",range=[2,3],),
yaxis=dict(title="Mean"),
title="Title"
)
fig.update_traces(marker=dict(size=20,
line=dict(width=2,
color='DarkSlateGrey')),
selector=dict(mode='markers'))
fig.update_layout(
yaxis = dict(
tickmode = 'array',
tickvals = [1, 2, 3],
ticktext = ['Less than usual', 'About the same', 'More than usual']
)
)
fig.show()

Plotly timeline with objects

In the below example, I would like to group the elements of y axis by continent, and to display the name of the continent at the top of each group. I can't figure out in the layout where we can set it. the example come from this plotly page
import pandas as pd
import plotly.graph_objects as go
from plotly import data
df = data.gapminder()
df = df.loc[ (df.year.isin([1987, 2007]))]
countries = (
df.loc[ (df.year.isin([2007]))]
.sort_values(by=["pop"], ascending=True)["country"]
.unique()
)[5:-10]
data = {"x": [], "y": [], "colors": [], "years": []}
for country in countries:
data["x"].extend(
[
df.loc[(df.year == 1987) & (df.country == country)]["pop"].values[0],
df.loc[(df.year == 2007) & (df.country == country)]["pop"].values[0],
None,
]
)
data["y"].extend([country, country, None]),
data["colors"].extend(["cyan", "darkblue", "white"]),
data["years"].extend(["1987", "2007", None])
fig = go.Figure(
data=[
go.Scatter(
x=data["x"],
y=data["y"],
mode="lines",
marker=dict(
color="grey",
)),
go.Scatter(
x=data["x"],
y=data["y"],
text=data["years"],
mode="markers",
marker=dict(
color=data["colors"],
symbol=["square","circle","circle"]*10,
size=16
),
hovertemplate="""Country: %{y} <br> Population: %{x} <br> Year: %{text} <br><extra></extra>"""
)
]
)
To show grouping by continent instead of the code you showed would require looping through the data structure from dictionary format to data frame. y-axis by continent by specifying a multi-index for the y-axis.
I have limited myself to the top 5 countries by continent because the large number of categorical variables on the y-axis creates a situation that is difficult to see for visualization. You can rewrite/not set here according to your needs. Furthermore, in terms of visualization, I have set the x-axis type to log format because the large discrepancies in the numbers make the visualization weaker. This is also something I added on my own and you can edit it yourself.
import pandas as pd
import plotly.graph_objects as go
from plotly import data
df = data.gapminder()
df = df.loc[(df.year.isin([1987, 2007]))]
# top5 by continent
countries = (df.loc[df.year.isin([2007])]
.groupby(['continent',], as_index=False, sort=[True])[['country','pop']].head()['country']
)
df = df[df['country'].isin(countries.tolist())]
fig = go.Figure()
for c in df['continent'].unique():
dff = df.query('continent == #c')
#print(dff)
for cc in dff['country'].unique():
dfc = dff.query('country == #cc')
fig.add_trace(go.Scatter(x=dfc['pop'].tolist(),
y=[dfc['continent'],dfc['country']],
mode='lines+markers',
marker=dict(
color='grey',
))
)
fig.add_trace(go.Scatter(x=dfc['pop'].tolist(),
y=[dfc['continent'],dfc['country']],
text=dfc["year"],
mode="markers",
marker=dict(
color=["cyan", "darkblue", "white"],
size=16,
))
)
fig.update_layout(autosize=False, height=800, width=800, showlegend=False)
fig.update_xaxes(type='log')
fig.show()

How to combine plotly legends?

How to edit plotly legends so that "color" and "symbol" will be combined to one?
import pandas as pd
import plotly.express as px
import numpy as np
from sklearn.decomposition import PCA
pca = PCA()
X_pca = pca.fit_transform(X)
fig = px.scatter(X_pca, x=0, y=1, color=df["subtype"], width=600, height=600, color_discrete_sequence=["red", "orange", "brown", "green"], title="Kernel PCA - tumor subtype vs normal", symbol=df["subtype"], symbol_sequence=['circle', 'circle', 'circle', 'square'])
fig.update_traces(marker=dict(size=5, line=dict(width=0.5, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.update_xaxes(automargin=True)
fig.update_yaxes(automargin=True)
fig.update_layout({'plot_bgcolor': 'rgb(240,240,240)', 'paper_bgcolor': 'rgb(240,240,240)',})
fig.show()
fig.write_image("meth_subtype_pca.png")
Current output :
The legend has "color, symbol"..."kirp, kirp" etc. I want to replace this with "subtype"..."KIRP".
This event is new to me; it is caused by specifying symbols using a PCA scatterplot. I confirmed the same event by adding symbols in the example in the reference. There doesn't seem to be a way to control this so I customize the graph created, one is taking the duplicate legend names and set() them to a single value. Also, the legend title is specified manually.
import pandas as pd
import plotly.express as px
import numpy as np
from sklearn.decomposition import PCA
df = px.data.iris()
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
pca = PCA()
X_pca = pca.fit_transform(X)
fig = px.scatter(X_pca,
x=0,
y=1,
color=df["species"],
width=600,
height=600,
color_discrete_sequence=["red", "orange", "brown", "green"],
title="Kernel PCA - tumor subtype vs normal",
symbol=df["species"],
symbol_sequence=['circle', 'circle', 'square']
)
fig.update_traces(marker=dict(size=5, line=dict(width=0.5, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.update_xaxes(automargin=True)
fig.update_yaxes(automargin=True)
fig.update_layout({'plot_bgcolor': 'rgb(240,240,240)', 'paper_bgcolor': 'rgb(240,240,240)',})
# update
for data in fig.data:
data['name'] = list(set(data['name'].split(', ')))[0]
fig.layout['legend']['title']['text'] = 'color'
fig.show()
#fig.write_image("meth_subtype_pca.png")

Combined xaxis and header of table with plotly Python

I would like to do something quite similar to the picture with plotly on python. I tried to find a way with subplots and shared_axis but no way to find a correct way. Is it possible to share the x axis of a bar chart with the column titles of a table?
graph bar with shared xaxis
this can be simulated with two traces
first trace is a standard bar chart, with yaxis domain constrained to 80% of the figure
second trace is a bar showing values as text and a fixed height against a second yaxis. yaxis2 is constrained to 10% of the domain
import plotly.express as px
import pandas as pd
import numpy as np
df = pd.DataFrame({"year": range(2011, 2022)}).assign(
pct=lambda d: np.random.uniform(-0.08, 0.08, len(d))
)
px.bar(df, x="year", y="pct").add_traces(
px.bar(df, x="year", y=np.full(len(df), 1), text="pct")
.update_traces(
yaxis="y2",
marker={"line": {"color": "black", "width": 1.5}, "color": "#E5ECF6"},
texttemplate="%{text:,.2%}",
)
.data
).update_layout(
yaxis={"domain": [0.2, 1], "tickformat": ",.2%"},
yaxis2={"domain": [0, 0.1], "visible": False},
xaxis={"title": "", "dtick": 1},
)

Categories

Resources