Two sided grouped barplots with Python seaborn - python

I'm trying to draw a two-sided graph similar to the population pyramid explained here and here. The problem is that I have categorical variables (male/female) that I want to group together:
import pandas as pd
import seaborn as sns
# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)
# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(data=df, x='mass (g)', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
plt.show()
The figure below is what I'm aiming for. Different color bars are for male/female sex. Different species X, Y, Z are in separate categorical groups. The bars on the right side of the figure show mass of adults for each species.
I sketched in red the bars on the left side to show mass of juveniles for each species. How do I plot this? I can't find anything useful in the seaborn docs or on SO.

If you mix positive and negative values, by default seaborn's barplot will average them out.
You could draw two barplots back to back and reverse the left one:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
df['sex'] = pd.Categorical(df['sex']) # make hue column categorical, forcing a fixed order
sns.set_theme(style='whitegrid')
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 5), sharey=True, gridspec_kw={'wspace': 0})
# draw adult subplot at the right
sns.barplot(data=df[df['age'] == 'adult'], x='mass (g)', y='species', hue='sex',
ci=False, orient='horizontal', dodge=True, ax=ax2)
ax2.yaxis.set_label_position('right')
ax2.tick_params(axis='y', labelright=True, right=True)
ax2.set_title(' '+'adult', loc='left')
ax2.legend_.remove() # remove the legend; the legend will be in ax1
# draw juvenile subplot at the left
sns.barplot(data=df[df['age'] == 'juvenile'], x='mass (g)', y='species', hue='sex',
ci=False, orient='horizontal', dodge=True, ax=ax1)
# optionally use the same scale left and right
xmax = max(ax1.get_xlim()[1], ax2.get_xlim()[1])
ax1.set_xlim(xmax=xmax)
ax2.set_xlim(xmax=xmax)
ax1.invert_xaxis() # reverse the direction
ax1.tick_params(labelleft=False, left=False)
ax1.set_ylabel('')
ax1.set_title('juvenile'+' ', loc='right')
plt.tight_layout()
plt.show()
An interesting feature of seaborn's barplots is that it will also do the work of averaging out the values given a dataframe with a row for each individual (and calculate a confidence interval).

Try something like this:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)
# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
df_reshape = df.set_index(['species','sex','age']).unstack(['age','sex'])['mass (g)']
df_reshape.loc[:, 'adult'].plot.barh(ax=ax)
df_reshape.loc[:, 'juvenile'].plot.barh(legend=False, ax=ax)
plt.show()
Output:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)
# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
df_reshape = df.set_index(['species','sex','age']).unstack(['age','sex'])['mass (g)']
df_reshape.loc[:, ['adult']].plot.barh(ax=ax, edgecolor='k')
df_reshape.loc[:, ['juvenile']].plot.barh(ax=ax, label='Juvenile', color=['navy','red'], alpha=.6, edgecolor='k', hatch='/')
plt.show()
Output:

I just used `pivot to shape the data correctly
import pandas as pd
import seaborn as sns
# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)
# pivot data
df=df.pivot(columns=['age'], index=['species', 'sex'], values=['mass (g)']).reset_index()
df = df.set_index(['species', 'sex'])['mass (g)'].reset_index()
# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(data=df, x='adult', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)
sns.barplot(data=df, x='juvenile', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)

Related

pandas : drop duplicates in the same time when grouping by

im doing a simple groupby on my data as shown in the code below. Is there a manner to do it directly without the drop_duplicates please, in the same line of code?
Thank you
df_brut['Revenue'] = df_brut.groupby(['cod', 'date', 'zone'])['Revenue'].transform('sum')
df_brut = df_brut.drop_duplicates()
df_brut.columns = ['cod','date', 'zone','SUM_']
My data
data1 = {'date': ['2021-06', '2021-06', '2021-07', '2021-07', '2021-07', '2021-07'], 'cod': ['12', '12', '14', '15', '15', '18'], 'zone': ['LA', 'LA', 'LA', 'PARIS', 'PARIS', 'PARIS'], 'Revenue': [10, 20, 30, 50, 40, 10]}
df_brut= pd.DataFrame(data1)
the grouped data expected is
data2 = {'date': ['2021-06', '2021-07', '2021-07', '2021-07'], 'cod': ['12', '14', '15','18'], 'zone': ['LA', 'LA', 'PARIS', 'PARIS'], 'SUM_': [30, 30, 90, 10]}
df_grouped= pd.DataFrame(data2)
You could do:
(df_brut.groupby(['cod', 'date', 'zone'], as_index=False)['Revenue']
.sum()
.rename({'Revenue': '_SUM'}, axis=1)
)

add alpha (shade) to plotly express stacked bar

I have the following pandas dataframe
import pandas as pd
df_dict = {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8], 'columns': ['from', 'moving', 'N', 'total', 'perc', 'helper', 'label'], 'data': [['0', 'no', 29, 39, 74.35897435897436, 'all', '74.4 %'], ['0', 'yes', 10, 39, 25.641025641025642, 'all', '25.6 %'], ['1', 'no', 77, 84, 91.66666666666667, 'all', '91.7 %'], ['1', 'yes', 7, 84, 8.333333333333334, 'all', '8.3 %'], ['2', 'no', 6, 6, 100.0, 'all', '100.0 %'], ['3', 'no', 19, 25, 76.0, 'all', '76.0 %'], ['3', 'yes', 6, 25, 24.0, 'all', '24.0 %'], ['4', 'no', 30, 45, 66.66666666666667, 'all', '66.7 %'], ['4', 'yes', 15, 45, 33.333333333333336, 'all', '33.3 %']]}
df = pd.DataFrame(index=df_dict['index'], columns=df_dict['columns'], data=df_dict['data'])
I am using the following code:
import plotly.express as px
def pl(dt, color_col, title, facet_col=None,
color_discrete_map=dict(zip(['4', '0', '2', '3', '1'],['#003898', '#164461','#61B3C1', '#8ED3F6 ','#8DD1C8']))):
px.bar(dt, x='helper', y='perc', color=color_col, facet_col=facet_col, category_orders={col: sorted(dt_temp[col].unique())},
color_discrete_map=color_discrete_map, title=title, text='label').show()
pl(dt=df, facet_col='from',
color_col='from', title='title')
In order to produce this plot:
I would like to add a shading of the specified color in the color_discrete_map with respect to the moving column of the df, so that the no's are a bit more faded.
How could I do that with plotly express ?
I don't believe that you can access the text font through any of the px.bar parameters. However, you can save your px.bar in an object called fig, then directly modify the each bar object through fig.data[0], fig.data[1], ... fig.data[n-1] for n bars.
The text color for each of these bars can be modified by passing a dictionary to the textcolor attribute of each of bar objects fig.data[0]... fig.data[n-1]. For example, you could modify the text of the first bar to be red with the line: fig.data[0].textcolor: {'color': 'red'}. This lends itself to looping through each fig.data bar object and modifying the textcolor attribute to be the desired color.
The last part is to make your color a shade of the bar color. I am not that familiar with hex color codes, so it makes the most sense to convert each hex color code to an rgb tuple of three values, and find the intermediate color between this value and white. plotly.colors conveniently has the methods hex_to_rgb and find_intermediate_color so we can use these to convert each of your hex colors to rgb, then find the rgb tuple between that color and white which is rgb(255,255,255).
To be consistent with the way you've structured your program, I put the code setting the textfont attributes in your pl function.
import pandas as pd
import plotly.express as px
from plotly.colors import hex_to_rgb, find_intermediate_color
df_dict = {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8], 'columns': ['from', 'moving', 'N', 'total', 'perc', 'helper', 'label'], 'data': [['0', 'no', 29, 39, 74.35897435897436, 'all', '74.4 %'], ['0', 'yes', 10, 39, 25.641025641025642, 'all', '25.6 %'], ['1', 'no', 77, 84, 91.66666666666667, 'all', '91.7 %'], ['1', 'yes', 7, 84, 8.333333333333334, 'all', '8.3 %'], ['2', 'no', 6, 6, 100.0, 'all', '100.0 %'], ['3', 'no', 19, 25, 76.0, 'all', '76.0 %'], ['3', 'yes', 6, 25, 24.0, 'all', '24.0 %'], ['4', 'no', 30, 45, 66.66666666666667, 'all', '66.7 %'], ['4', 'yes', 15, 45, 33.333333333333336, 'all', '33.3 %']]}
df = pd.DataFrame(index=df_dict['index'], columns=df_dict['columns'], data=df_dict['data'])
bar_color_map = dict(zip(['4', '0', '2', '3', '1'],['#003898', '#164461','#61B3C1', '#8ED3F6','#8DD1C8']))
def pl(dt, color_col, title, facet_col=None, color_discrete_map=bar_color_map):
fig = px.bar(dt, x='helper', y='perc', color=color_col, facet_col=facet_col,
# category_orders={col: sorted(dt_temp[col].unique())},
category_orders={color_col: sorted(dt[color_col].unique())},
color_discrete_map=color_discrete_map, title=title, text='label')
## set fig.data.textfont attribute
for bar_number in range(len(fig.data)):
bar_color = hex_to_rgb(bar_color_map[str(bar_number)])
shaded_text_color = find_intermediate_color(bar_color,(255,255,255),0.5)
shaded_int_rgb_color = tuple([int(text_color) for text_color in shaded_text_color])
# print('rgb'+str(shaded_int_rgb_color))
fig.data[bar_number].textfont = {'color': 'rgb'+str(shaded_int_rgb_color)}
fig.show()
pl(dt=df, facet_col='from', color_col='from', title='title')

Add multiple` plotly.express.bar` figures into one window

I have 3 bar charts with separated data-frames and x,y labels. I can not use make_subplot tool since it is compatible with graph_object instance, not with express.bar. In this case, I've read documentations about facet_row(facet_col) properties which plots bars in one figure but it does not fit to my case. I literally want 3 different figures in one page/window.
I create bar charts as below:
import plotly.express as px
x = ['one', 'two', 'three']
y = [1, 2, 3]
dataframe1 = {
"x_axis_1": x,
"y_axis_1": y
}
fig1 = px.bar(dataframe1, x="x_axis_1", y="y_axis_1")
fig1.update_xaxes(type='category')
Thank you for any idea in advance!
As you mentioned the add_traces expects us to input instances of trace classes from the plotly.graph_objects package (e.g plotly.graph_objects.Scatter, plotly.graph_objects.Bar) (see here)
However since you do not want to use graph_objects we have to find a workaround
Each fig that you create using plotly express has 2 parts : data and layout
For example for your bar chart, if I do:
print (fig1)
>>>
Figure({
'data': [{'alignmentgroup': 'True',
'hovertemplate': 'x_axis_1=%{x}<br>y_axis_1=%{y}<extra></extra>',
'legendgroup': '',
'marker': {'color': '#636efa'},
'name': '',
'offsetgroup': '',
'orientation': 'v',
'showlegend': False,
'textposition': 'auto',
'type': 'bar',
'x': array(['one', 'two', 'three'], dtype=object),
'xaxis': 'x',
'y': array([1, 2, 3], dtype=int64),
'yaxis': 'y'}],
'layout': {'barmode': 'relative',
'legend': {'tracegroupgap': 0},
'margin': {'t': 60},
'template': '...',
'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'x_axis_1'}},
'yaxis': {'anchor': 'x', 'domain': [0.0, 1.0], 'title': {'text': 'y_axis_1'}}}
})
If we check the data parameter its a list of plotly graph_object classes:
print (fig1['data'][0])
>>>
Bar({
'alignmentgroup': 'True',
'hovertemplate': 'x_axis_1=%{x}<br>y_axis_1=%{y}<extra></extra>',
'legendgroup': '',
'marker': {'color': '#636efa'},
'name': '',
'offsetgroup': '',
'orientation': 'v',
'showlegend': False,
'textposition': 'auto',
'x': array(['one', 'two', 'three'], dtype=object),
'xaxis': 'x',
'y': array([1, 2, 3], dtype=int64),
'yaxis': 'y'
})
print(type(fig1['data'][0]))
>>>
<class 'plotly.graph_objs._bar.Bar'>
So basically we have to extract this out from the plotly express figure and pass it to add_traces for it to work
The complete solution
Generate some data:
x1 = ['one', 'two', 'three']
y1 = [1, 2, 3]
x2 = ['five', 'six', 'seven']
y2 = [5, 6, 7]
x3 = ['eight', 'nine', 'ten']
y3 = [8, 9, 10]
dataframe1 = {
"x_axis_1": x,
"y_axis_1": y
}
dataframe2 = {
"x_axis_2": x2,
"y_axis_2": y2
}
dataframe3 = {
"x_axis_3": x3,
"y_axis_3": y3
}
Generate the plot:
from plotly.subplots import make_subplots
fig = make_subplots(rows=3, cols=1)
fig1 = px.bar(dataframe1, x="x_axis_1", y="y_axis_1")
fig2 = px.bar(dataframe2, x="x_axis_2", y="y_axis_2")
fig3 = px.bar(dataframe3, x="x_axis_3", y="y_axis_3")
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig3['data'][0], row=3, col=1)
fig.show()
Output:
As you can see by querying the data key from the plotly express figure, we get the required format we need to pass to the add_traces method.

Animated plot with `plotly`

I'd like to plot a convergence process of the MLE algorithm with the plotly library.
Requirements:
the points have to be colored colored in the colors of the clusters, and change accordingly each iteration
the centroids of the clusters should be plotted on each iteration.
A plot of a single iteration may be produced by Code 1, with the desired output shown in Figure 1:
Code 1
import plotly.graph_objects as go
import numpy as np
A = np.random.randn(30).reshape((15, 2))
centroids = np.random.randint(10, size=10).reshape((5, 2))
clusters = [1, 2, 3, 4, 5]
colors = ['red', 'green', 'blue', 'yellow', 'magenta']
fig = go.Figure()
for i in range(5):
fig.add_trace(
go.Scatter(
x=A[i:i+3][:, 0],
y=A[i:i+3][:, 1],
mode='markers',
name=f'cluster {i+1}',
marker_color=colors[i]
)
)
for c in clusters:
fig.add_trace(
go.Scatter(
x=[centroids[c-1][0]],
y=[centroids[c-1][1]],
name=f'centroid of cluster {c}',
mode='markers',
marker_color=colors[c-1],
marker_symbol='x'
)
)
fig.show()
Figure 1
I've seen this tutorial, but it seems that you can plot only a single trace in a graph_objects.Frame(), and Code 2 represents a simple example for producing an animated scatter plot of all the points, where each frame plots points from different cluster and the centroids:
Code 2
import plotly.graph_objects as go
import numpy as np
A = np.random.randn(30).reshape((15, 2))
centroids = np.random.randint(10, size=10).reshape((5, 2))
clusters = [1, 2, 3, 4, 5]
colors = ['red', 'green', 'blue', 'yellow', 'magenta']
fig = go.Figure(
data=[go.Scatter(x=A[:3][:,0], y=A[:3][:,1], mode='markers', name='cluster 1', marker_color=colors[0])],
layout=go.Layout(
xaxis=dict(range=[-10, 10], autorange=False),
yaxis=dict(range=[-10, 10], autorange=False),
title="Start Title",
updatemenus=[dict(
type="buttons",
buttons=[dict(label="Play",
method="animate",
args=[None])])]
),
frames=[go.Frame(data=[go.Scatter(x=A[:3][:,0], y=A[:3][:,1], mode='markers', name='cluster 2', marker_color=colors[1])]),
go.Frame(data=[go.Scatter(x=A[3:5][:,0], y=A[3:5][:,1], mode='markers', name='cluster 3', marker_color=colors[2])]),
go.Frame(data=[go.Scatter(x=A[5:8][:,0], y=A[5:8][:,1], mode='markers', name='cluster 4', marker_color=colors[3])]),
go.Frame(data=[go.Scatter(x=A[8:][:,0], y=A[8:][:,1], mode='markers', name='cluster 5', marker_color=colors[4])]),
go.Frame(data=[go.Scatter(x=[centroids[0][0]], y=[centroids[0][1]], mode='markers', name='centroid of cluster 1', marker_color=colors[0], marker_symbol='x')]),
go.Frame(data=[go.Scatter(x=[centroids[1][0]], y=[centroids[1][1]], mode='markers', name='centroid of cluster 2', marker_color=colors[1], marker_symbol='x')]),
go.Frame(data=[go.Scatter(x=[centroids[2][0]], y=[centroids[2][1]], mode='markers', name='centroid of cluster 3', marker_color=colors[2], marker_symbol='x')]),
go.Frame(data=[go.Scatter(x=[centroids[3][0]], y=[centroids[3][1]], mode='markers', name='centroid of cluster 4', marker_color=colors[3], marker_symbol='x')]),
go.Frame(data=[go.Scatter(x=[centroids[4][0]], y=[centroids[4][1]], mode='markers', name='centroid of cluster 5', marker_color=colors[4], marker_symbol='x')])]
)
fig.show()
Why does Code 2 does not fit my needs:
I need to plot all the frames produced by Code 2 in a single frame each iteration of the algorithm (i.e. each frame of the desired solution will look like Figure 1)
What I have tried:
I have tried producing a graph_objects.Figure(), and adding it to a graph_objects.Frame() as shown in Code 3, but have gotten Error 1.
Code 3:
import plotly.graph_objects as go
import numpy as np
A = np.random.randn(30).reshape((15, 2))
centroids = np.random.randint(10, size=10).reshape((5, 2))
clusters = [1, 2, 3, 4, 5]
colors = ['red', 'green', 'blue', 'yellow', 'magenta']
fig = go.Figure()
for i in range(5):
fig.add_trace(
go.Scatter(
x=A[i:i+3][:, 0],
y=A[i:i+3][:, 1],
mode='markers',
name=f'cluster {i+1}',
marker_color=colors[i]
)
)
for c in clusters:
fig.add_trace(
go.Scatter(
x=[centroids[c-1][0]],
y=[centroids[c-1][1]],
name=f'centroid of cluster {c}',
mode='markers',
marker_color=colors[c-1],
marker_symbol='x'
)
)
animated_fig = go.Figure(
data=[go.Scatter(x=A[:3][:, 0], y=A[:3][:, 1], mode='markers', name=f'cluster 0', marker_color=colors[0])],
layout=go.Layout(
xaxis=dict(range=[-10, 10], autorange=False),
yaxis=dict(range=[-10, 10], autorange=False),
title="Start Title",
updatemenus=[dict(
type="buttons",
buttons=[dict(label="Play",
method="animate",
args=[None])])]
),
frames=[go.Frame(data=[fig])]
)
animated_fig.show()
Error 1:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-681-11264f38e6f7> in <module>
43 args=[None])])]
44 ),
---> 45 frames=[go.Frame(data=[fig])]
46 )
47
~\Anaconda3\lib\site-packages\plotly\graph_objs\_frame.py in __init__(self, arg, baseframe, data, group, layout, name, traces, **kwargs)
241 _v = data if data is not None else _v
242 if _v is not None:
--> 243 self["data"] = _v
244 _v = arg.pop("group", None)
245 _v = group if group is not None else _v
~\Anaconda3\lib\site-packages\plotly\basedatatypes.py in __setitem__(self, prop, value)
3973 # ### Handle compound array property ###
3974 elif isinstance(validator, (CompoundArrayValidator, BaseDataValidator)):
-> 3975 self._set_array_prop(prop, value)
3976
3977 # ### Handle simple property ###
~\Anaconda3\lib\site-packages\plotly\basedatatypes.py in _set_array_prop(self, prop, val)
4428 # ------------
4429 validator = self._get_validator(prop)
-> 4430 val = validator.validate_coerce(val, skip_invalid=self._skip_invalid)
4431
4432 # Save deep copies of current and new states
~\Anaconda3\lib\site-packages\_plotly_utils\basevalidators.py in validate_coerce(self, v, skip_invalid, _validate)
2671
2672 if invalid_els:
-> 2673 self.raise_invalid_elements(invalid_els)
2674
2675 v = to_scalar_or_list(res)
~\Anaconda3\lib\site-packages\_plotly_utils\basevalidators.py in raise_invalid_elements(self, invalid_els)
298 pname=self.parent_name,
299 invalid=invalid_els[:10],
--> 300 valid_clr_desc=self.description(),
301 )
302 )
ValueError:
Invalid element(s) received for the 'data' property of frame
Invalid elements include: [Figure({
'data': [{'marker': {'color': 'red'},
'mode': 'markers',
'name': 'cluster 1',
'type': 'scatter',
'x': array([-1.30634452, -1.73005459, 0.58746435]),
'y': array([ 0.15388112, 0.47452796, -1.86354483])},
{'marker': {'color': 'green'},
'mode': 'markers',
'name': 'cluster 2',
'type': 'scatter',
'x': array([-1.73005459, 0.58746435, -0.27492892]),
'y': array([ 0.47452796, -1.86354483, -0.20329897])},
{'marker': {'color': 'blue'},
'mode': 'markers',
'name': 'cluster 3',
'type': 'scatter',
'x': array([ 0.58746435, -0.27492892, 0.21002816]),
'y': array([-1.86354483, -0.20329897, 1.99487636])},
{'marker': {'color': 'yellow'},
'mode': 'markers',
'name': 'cluster 4',
'type': 'scatter',
'x': array([-0.27492892, 0.21002816, -0.0148647 ]),
'y': array([-0.20329897, 1.99487636, 0.73484184])},
{'marker': {'color': 'magenta'},
'mode': 'markers',
'name': 'cluster 5',
'type': 'scatter',
'x': array([ 0.21002816, -0.0148647 , 1.13589386]),
'y': array([1.99487636, 0.73484184, 2.08810809])},
{'marker': {'color': 'red', 'symbol': 'x'},
'mode': 'markers',
'name': 'centroid of cluster 1',
'type': 'scatter',
'x': [9],
'y': [6]},
{'marker': {'color': 'green', 'symbol': 'x'},
'mode': 'markers',
'name': 'centroid of cluster 2',
'type': 'scatter',
'x': [0],
'y': [5]},
{'marker': {'color': 'blue', 'symbol': 'x'},
'mode': 'markers',
'name': 'centroid of cluster 3',
'type': 'scatter',
'x': [8],
'y': [6]},
{'marker': {'color': 'yellow', 'symbol': 'x'},
'mode': 'markers',
'name': 'centroid of cluster 4',
'type': 'scatter',
'x': [7],
'y': [1]},
{'marker': {'color': 'magenta', 'symbol': 'x'},
'mode': 'markers',
'name': 'centroid of cluster 5',
'type': 'scatter',
'x': [6],
'y': [2]}],
'layout': {'template': '...'}
})]
The 'data' property is a tuple of trace instances
that may be specified as:
- A list or tuple of trace instances
(e.g. [Scatter(...), Bar(...)])
- A single trace instance
(e.g. Scatter(...), Bar(...), etc.)
- A list or tuple of dicts of string/value properties where:
- The 'type' property specifies the trace type
One of: ['area', 'bar', 'barpolar', 'box',
'candlestick', 'carpet', 'choropleth',
'choroplethmapbox', 'cone', 'contour',
'contourcarpet', 'densitymapbox', 'funnel',
'funnelarea', 'heatmap', 'heatmapgl',
'histogram', 'histogram2d',
'histogram2dcontour', 'image', 'indicator',
'isosurface', 'mesh3d', 'ohlc', 'parcats',
'parcoords', 'pie', 'pointcloud', 'sankey',
'scatter', 'scatter3d', 'scattercarpet',
'scattergeo', 'scattergl', 'scattermapbox',
'scatterpolar', 'scatterpolargl',
'scatterternary', 'splom', 'streamtube',
'sunburst', 'surface', 'table', 'treemap',
'violin', 'volume', 'waterfall']
- All remaining properties are passed to the constructor of
the specified trace type
(e.g. [{'type': 'scatter', ...}, {'type': 'bar, ...}])
I've succeeded to get all the points present in each frame with the use of plotly.express module, as shown in Code 3, but the only thing that is missing there is for the centroids to be marked as xs.
Code 3:
import plotly.express as px
import numpy as np
import pandas as pd
A = np.random.randn(200).reshape((100, 2))
iteration = np.array([1, 2, 3, 4, 5]).repeat(20)
centroids = np.random.randint(10, size=10).reshape((5, 2))
clusters = np.random.randint(1, 6, size=100)
colors = ['red', 'green', 'blue', 'yellow', 'magenta']
df = pd.DataFrame(dict(x1=A[:, 0], x2=A[:, 1], type='point', cluster=pd.Series(clusters, dtype='str'), iteration=iteration))
centroid_df = pd.DataFrame(dict(x1=centroids[:, 0], x2=centroids[:, 1], type='centroid', cluster=[1, 2, 3, 4, 5], iteration=[1, 2, 3, 4, 5]))
df = df.append(centroid_df, ignore_index=True)
px.scatter(df, x="x1", y="x2", animation_frame="iteration", color="cluster", hover_name="cluster", range_x=[-10,10], range_y=[-10,10])
I'd appreciate any help for achieving the desired result. Thanks.
You can add two traces per frame but apparently you need to define these two traces in the first data too. I added again the first two traces as a frame in order to have them visible in subsequent play. Here the full code
import plotly.graph_objects as go
import numpy as np
A = np.random.randn(30).reshape((15, 2))
centroids = np.random.randint(10, size=10).reshape((5, 2))
clusters = [1, 2, 3, 4, 5]
colors = ['red', 'green', 'blue', 'yellow', 'magenta']
fig = go.Figure(
data=[go.Scatter(x=A[:3][:,0],
y=A[:3][:,1],
mode='markers',
name='cluster 1',
marker_color=colors[0]),
go.Scatter(x=[centroids[0][0]],
y=[centroids[0][1]],
mode='markers',
name='centroid of cluster 1',
marker_color=colors[0],
marker_symbol='x')
],
layout=go.Layout(
xaxis=dict(range=[-10, 10], autorange=False),
yaxis=dict(range=[-10, 10], autorange=False),
title="Start Title",
updatemenus=[dict(
type="buttons",
buttons=[dict(label="Play",
method="animate",
args=[None]),
dict(label="Pause",
method="animate",
args=[None,
{"frame": {"duration": 0, "redraw": False},
"mode": "immediate",
"transition": {"duration": 0}}],
)])]
),
frames=[
go.Frame(
data=[go.Scatter(x=A[:3][:,0],
y=A[:3][:,1],
mode='markers',
name='cluster 1',
marker_color=colors[0]),
go.Scatter(x=[centroids[0][0]],
y=[centroids[0][1]],
mode='markers',
name='centroid of cluster 1',
marker_color=colors[0],
marker_symbol='x')
]),
go.Frame(
data=[
go.Scatter(x=A[:3][:,0],
y=A[:3][:,1],
mode='markers',
name='cluster 2',
marker_color=colors[1]),
go.Scatter(x=[centroids[1][0]],
y=[centroids[1][1]],
mode='markers',
name='centroid of cluster 2',
marker_color=colors[1],
marker_symbol='x')
]),
go.Frame(
data=[
go.Scatter(x=A[3:5][:,0],
y=A[3:5][:,1],
mode='markers',
name='cluster 3',
marker_color=colors[2]),
go.Scatter(x=[centroids[2][0]],
y=[centroids[2][1]],
mode='markers',
name='centroid of cluster 3',
marker_color=colors[2],
marker_symbol='x')
]),
go.Frame(
data=[
go.Scatter(x=A[5:8][:,0],
y=A[5:8][:,1],
mode='markers',
name='cluster 4',
marker_color=colors[3]),
go.Scatter(x=[centroids[3][0]],
y=[centroids[3][1]],
mode='markers',
name='centroid of cluster 4',
marker_color=colors[3],
marker_symbol='x')]),
go.Frame(
data=[
go.Scatter(x=A[8:][:,0],
y=A[8:][:,1],
mode='markers',
name='cluster 5',
marker_color=colors[4]),
go.Scatter(x=[centroids[4][0]],
y=[centroids[4][1]],
mode='markers',
name='centroid of cluster 5',
marker_color=colors[4],
marker_symbol='x')
]),
])
fig.show()

Plot a list of dictionaries using matplotlib

List =
[{'Month': '1', 'Store': 'A', 'Sales': '100'},
{'Month': '2', 'Store': 'A', 'Sales': '50'},
{'Month': '3', 'Store': 'A', 'Sales': '200'},
{'Month': '1', 'Store': 'B', 'Sales': '300'},
{'Month': '2', 'Store': 'B', 'Sales': '200'},
{'Month': '3', 'Store': 'B', 'Sales': '250'}]
I do know how to plot the basic line.
But how can I have a combined result with both data set?
Like this Expected result
This will do it. Place things in pandas simplify this - also, plot multiple line and then all will be shown on the same chart.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(your_data)
df[['Month', 'Sales']] = df[['Month', 'Sales']].apply(pd.to_numeric, errors='coerce')
a = df[df.Store == 'A']
b = df[df.Store == 'B']
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
a.plot('Month', 'Sales', ax=ax)
b.plot('Month', 'Sales', ax=ax)
ax.grid(True)
fig.set_facecolor('white')

Categories

Resources