How to speed up Dash App with Pandas Groupby - python

My Dash App runs, however I have a dataframe with approximately 10k rows and to reload the plots and datatable that I use I need to rerun the groupby statement which takes very long to load. The user can select filters on the left which updates the dashboard. In the dashboard, I need the grouped number of customers per city. Since I have a dcc.Graphand a dash_table I basically refer to the same underlying dataframe twice and therefore update it twice, which I think is extremely inefficient.
Is there a way to have the dataframe only updated once and then send the result to the dcc.Graphand the dash_table at once? Also, are there other ways to speed up the app?
import pandas as pd
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_table
df = pd.DataFrame.from_dict({'Customer': [111, 222, 555, 666],
'zip_city': ['Aguadilla', 'Aguadilla', 'Arecibo', 'Wrangell'],
'zip_latitude':[18.498987, 18.498987, 18.449732,56.409507],
'zip_longitude':[-67.13699,-67.13699,-66.69879,-132.33822],
'Gender':['m','f','m','f']})
df["CustomerCount"] = df.groupby(["zip_city"], as_index=False)["Customer"].transform("count")
gender_options = []
for gender in df['Gender'].unique():
gender_options.append({'label':str(gender),
'value':gender})
app = dash.Dash()
app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})
app.layout = html.Div([html.H1('A Dashboard', style={'textAlign':'center'}),
html.Div(children=[
html.H1('Input', style={'textAlign':'center'}),
html.H6('Gender'),
html.P(
dcc.Checklist(id='gender-picker',
options=gender_options,
values=['m','f']
)
)
],
style = {'float':'left'},
className = "two columns"
),
html.Div([dcc.Tabs(children=[dcc.Tab(label='Map',
children=html.Div([
dcc.Graph(id='CustomerMap')
])
),
dcc.Tab(label='Data',
children=[html.Div([dash_table.DataTable(
id='table',
columns = [{"name": i, "id": i} for i in df.columns],
data = df.to_dict("rows")
)])
]
)
]
)
])
]
)
#app.callback(
dash.dependencies.Output('CustomerMap', 'figure'),
[dash.dependencies.Input('gender-picker', 'values')])
def update_figure(selected_gender):
filtered_df = df[df['Gender'].isin(selected_gender)]
filtered_df["CustomerCount"] = filtered_df.groupby(["zip_city"], as_index=False)["Customer"].transform("count")
customerCount = filtered_df['CustomerCount'].tolist()
zipcity = filtered_df['zip_city'].tolist()
hovertext = []
for i in range(len(customerCount)):
k = str(zipcity[i]) + ':' + str(customerCount[i])
hovertext.append(k)
return {'data':[dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = filtered_df['zip_longitude'],
lat = filtered_df['zip_latitude'],
text = hovertext,
hoverinfo = 'text',
marker = dict(
size = filtered_df['CustomerCount'],
line = dict(width=0.5, color='rgb(40,40,40)'),
sizemode = 'area'
),
transforms = [dict(
type = 'aggregate',
groups = filtered_df['zip_city'],
aggregations = [dict(target = filtered_df['Customer'], func = 'count', enabled = True)]
)
]
)
]
}
#app.callback(
dash.dependencies.Output('table', 'data'),
[dash.dependencies.Input('gender-picker', 'values')])
def update_table(selected_gender):
filtered_df = df[df['Gender'].isin(selected_gender)]
filtered_df["CustomerCount"] = filtered_df.groupby(["zip_city"], as_index=False)["Customer"].transform("count")
return filtered_df.to_dict("rows")
if __name__ == '__main__':
app.run_server()

Related

Combining a callback and a for loop to return multiple values

Using Dash, I have a callback whose function should return several values with a for loop like so:
#app.callback(
Output(component_id={'type':'graph-', 'index':MATCH}, component_property='extendData'),
Input(component_id={'type':'store-', 'index':MATCH}, component_property="data")
)
def update_graphs_callback(data):
df = pd.read_json(data)
nb_columns = len(df.columns)
for i in range(nb_columns):
return(update_graphs(data, df.columns[i+1], i, 100))
However, since I am using return inside a for loop, the process stops the loop after the first iteration. I am looking at using yield to create a generator. However, it does not fix my issue because I am working inside a callback and I need the values to be returned. I can't use print(next(...) instead of return.
Is there a way to return successively all the values inside the for loop? If not, what could be a way to approach this problem?
Here is a minimal working example App:
import dash
from dash.dependencies import Output, Input, State, MATCH, ALL
from dash import dcc, html, ctx
import plotly
import plotly.express as px
import random
import plotly.graph_objs as go
import pandas as pd
# Initializing the data with the correct format
init_store = {}
n=3
init_df = pd.DataFrame({'a':pd.Series(dtype='int'), 'b':pd.Series(dtype='int'), 'c':pd.Series(dtype='int'), 'd':pd.Series(dtype='int')}, index=range(50))
init_df['a'] = init_df.index
init_store['0'] = init_df
for i in range(n):
init_df = pd.DataFrame({'a':pd.Series(dtype='int'), 'b':pd.Series(dtype='int')}, index=range(50))
init_df['a'] = init_df.index
init_store[f'{i+1}'] = init_df
# Function to update the dataframes with the new observations
def get_data(json_data):
df = pd.read_json(json_data)
compteur = df['a'][len(df['a'])-1]
if len(df.columns) > 2:
new_row = {'a':compteur + 1, 'b':random.randint(13,26), 'c':random.randint(13,26), 'd':random.randint(13,26)}
else:
new_row = {'a':compteur + 1, 'b':random.randint(13,26)}
df = df.shift(periods=-1)
df.iloc[len(df)-1] = new_row
return(df.to_json())
# Function to update the graphs based on the dataframes
def update_graphs(json_data, column, index, nb_obs_kept):
df = pd.read_json(json_data)
nb_obs = df.shape[0]
x_new = df['a'][len(df)-1]
y_new = df[column][nb_obs-1]
return dict(x=[[x_new]], y=[[y_new]]), index, nb_obs_kept
colors = px.colors.qualitative.G10
def generate_graph_containers(index, json_data):
dataframe = pd.read_json(json_data)
X = dataframe['a']
Y = dataframe.loc[:, dataframe.columns != 'a']
graph_id = {'type': 'graph-', 'index': index}
return(
html.Div(
html.Div(
dcc.Graph(
id=graph_id,
style={"height": "8rem"},
config={
"staticPlot": False,
"editable": False,
"displayModeBar": False,
},
figure=go.Figure(
{
"data": [
{
"x": list(X),
"y": list(Y[Y.columns[i]]),
"mode": "lines",
"name": Y.columns[i],
"line": {"color": colors[i+2]},
}
for i in range(len(Y.columns))
],
"layout": {
"uirevision": True,
"margin": dict(l=0, r=0, t=4, b=4, pad=0),
"xaxis": dict(
showline=False,
showgrid=False,
zeroline=False,
showticklabels=False,
),
"yaxis": dict(
showline=False,
showgrid=False,
zeroline=False,
showticklabels=False,
),
"paper_bgcolor": "rgba(0,0,0,0)",
"plot_bgcolor": "rgba(0,0,0,0)",
}
}
)
)
)
)
)
app = dash.Dash(__name__)
store = [dcc.Store(id={'type':'store-', 'index':i}, data=init_store[str(i)].to_json()) for i in range(n)]
def make_layout():
return(
html.Div(
[
html.Div(
store
),
dcc.Interval(
id = 'interval',
interval = 1000,
n_intervals = 0
),
html.Div(
[
generate_graph_containers(str(i), store[i].data) for i in range(n)
]
)
]
)
)
app.layout = make_layout
#app.callback(
Output(component_id={'type':'store-', 'index':MATCH}, component_property='data'),
[
Input('interval', 'n_intervals'),
State(component_id={'type':'store-', 'index':MATCH}, component_property='data')
]
)
def update_data(time, data):
return(get_data(data))
#app.callback(
Output(component_id={'type':'graph-', 'index':MATCH}, component_property='extendData'),
Input(component_id={'type':'store-', 'index':MATCH}, component_property="data")
)
def update_graphs_callback(data):
df = pd.read_json(data)
nb_columns = len(df.columns)
for i in range(nb_columns):
return(update_graphs(data, df.columns[i+1], i, 100))
if __name__ == '__main__':
app.run_server(debug=True, host='0.0.0.0', port=8050)
Any help is appreciated!

Dash dynamic live graph update of multiple traces

I am trying to build a Dash app that takes several different data sources as inputs and plots one graph for each data source. As the data is a continuous stream, I store only a small amount (the last 50 observations) of data inside a dcc.Store, one for each data source.
I therefore have one callback allowing me to update the data contained inside the different dcc.Store using pattern matching callback.
I would also like to have one pattern matching callback allowing me to update the dcc.Graph associated with each of the data source, using the data in the dcc.Store.
However, some of the datasources give me several time series that I would like to plot all on the same graph using one trace for each time series. My update_graphs function seems to be working fine to update the graph with only one trace but not when there are multiple traces. I think I am not returning the proper format to update multiple traces with extendData property of the graph.
I want my app to be dynamic so I'm trying to avoid having one call back for each data source
Here is a small example:
import dash
from dash.dependencies import Output, Input, State, MATCH, ALL
from dash import dcc, html
import plotly
import random
import plotly.graph_objs as go
import pandas as pd
# Initializing the data with the correct format
init_store = {}
n=3
init_df = pd.DataFrame({'a':pd.Series(dtype='int'), 'b':pd.Series(dtype='int'), 'c':pd.Series(dtype='int'), 'd':pd.Series(dtype='int')}, index=range(50))
init_df['a'] = init_df.index
init_store['0'] = init_df
for i in range(n):
init_df = pd.DataFrame({'a':pd.Series(dtype='int'), 'b':pd.Series(dtype='int')}, index=range(50))
init_df['a'] = init_df.index
init_store[f'{i+1}'] = init_df
# Function to update the dataframes with the new observations
def get_data(json_data):
df = pd.read_json(json_data)
compteur = df['a'][len(df['a'])-1]
if len(df.columns) > 2:
new_row = {'a':compteur + 1, 'b':random.randint(13,26), 'c':random.randint(13,26), 'd':random.randint(13,26)}
else:
new_row = {'a':compteur + 1, 'b':random.randint(13,26)}
df = df.shift(periods=-1)
df.iloc[len(df)-1] = new_row
return(df.to_json())
# Function to update the graphs based on the dataframes
def update_graphs(json_data):
df = pd.read_json(json_data)
nb_obs = df.shape[0]
x_new = df['a'][len(df)-1]
if len(df.loc[:, df.columns != 'a'].columns) > 1:
df = df.loc[:, df.columns != 'a']
return_lists = []
for i in range(len(df.columns)):
return_lists.append([dict(x=[[x_new]], y=[[df[df.columns[i]][nb_obs-1]]]), [i]])
return return_lists
else:
y_new = df['b'][nb_obs-1]
return dict(x=[[x_new]], y=[[y_new]]), [0]
def generate_graph_containers(index, json_data):
dataframe = pd.read_json(json_data)
X = dataframe['a']
Y = dataframe.loc[:, dataframe.columns != 'a']
graph_id = {'type': 'graph-', 'index': index}
return(
html.Div(
html.Div(
dcc.Graph(
id=graph_id,
style={"height": "8rem"},
config={
"staticPlot": False,
"editable": False,
"displayModeBar": False,
},
figure=go.Figure(
{
"data": [
{
"x": list(X),
"y": list(Y[variable]),
"mode": "lines",
"name": variable,
"line": {"color": "#f4d44d"},
}
for variable in Y.columns
],
"layout": {
"uirevision": True,
"margin": dict(l=0, r=0, t=4, b=4, pad=0),
"xaxis": dict(
showline=False,
showgrid=False,
zeroline=False,
showticklabels=False,
),
"yaxis": dict(
showline=False,
showgrid=False,
zeroline=False,
showticklabels=False,
),
"paper_bgcolor": "rgba(0,0,0,0)",
"plot_bgcolor": "rgba(0,0,0,0)",
}
}
)
)
)
)
)
app = dash.Dash(__name__)
store = [dcc.Store(id={'type':'store-', 'index':i}, data=init_store[str(i)].to_json()) for i in range(n)]
def make_layout():
return(
html.Div(
[
html.Div(
store
),
dcc.Interval(
id = 'interval',
interval = 1000,
n_intervals = 0
),
html.Div(
[
generate_graph_containers(str(i), store[i].data) for i in range(n)
]
)
]
)
)
app.layout = make_layout
#app.callback(
Output(component_id={'type':'store-', 'index':MATCH}, component_property='data'),
[
Input('interval', 'n_intervals'),
State(component_id={'type':'store-', 'index':MATCH}, component_property='data')
]
)
def update_data(time, data):
return(get_data(data))
#app.callback(
Output(component_id={'type':'graph-', 'index':MATCH}, component_property='extendData'),
Input(component_id={'type':'store-', 'index':MATCH}, component_property="data")
)
def update_graph(data):
print(pd.read_json(data))
return (
update_graphs(data)
)
if __name__ == '__main__':
app.run_server(debug=True)

Dash Python how to make multiple updating graphs, how to title

new coder here, I'm trying to make 4 graphs that share the same random data (although I plan on splitting them apart later). When it was just the one graph it took the random data fine and automatically scaled. In the go.layout there's no way to put which graph you're labeling. Now that I've added multiple none of them have any titles, axis labels, or data. Please help!
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import random
import plotly.graph_objs as go
from collections import deque
#setting first points
X = deque(maxlen = 20)
X.append(1)
Y = deque(maxlen = 20)
Y.append(1)
#app settings, html layout
app = dash.Dash(__name__)
app.layout = html.Div(children=[
html.H1([
html.H1(children='Graphs'),
dcc.Graph(id = 'Battery Voltage', animate = True),
dcc.Interval(
id = 'graph-update',
interval = 1000,
n_intervals = 0
),
],
),
html.Div(
[
dcc.Graph(id = 'Signal', animate = True),
dcc.Interval(
id = 'graph-update2',
interval = 1000,
n_intervals = 0
),
],
),
html.Div(
[
dcc.Graph(id = 'Health', animate = True),
dcc.Interval(
id = 'graph-update3',
interval = 1000,
n_intervals = 0
),
],
),
html.Div(
[
dcc.Graph(id = 'Prognastics', animate = True),
dcc.Interval(
id = 'graph-update4',
interval = 1000,
n_intervals = 0
),
],
)])
#this is what keeps the graph updating
#app.callback(
[Output('Battery Voltage', 'figure'),
Output('Signal', 'figure'),
Output('Health', 'figure'),
Output('Prognastics', 'figure')],
[ Input('graph-update', 'n_intervals'),
Input('graph-update2', 'n_intervals'),
Input('graph-update3', 'n_intervals'),
Input('graph-update4', 'n_intervals')]
)
def update_graph_scatter(n):
X.append(X[-1]+1)
Y.append(Y[-1]+Y[-1] * random.uniform(-0.1,0.1))
data = go.Scatter(
x=list(X),
y=list(Y),
name='Scatter',
mode= 'lines+markers'
)
return {'data': [data],
'layout' : go.Layout(title="Battery Voltage",
xaxis_title="Time",
yaxis_title="Voltage",
xaxis=dict(range=[min(X),max(X)]),
yaxis = dict(range = [min(Y),max(Y)])
)
}
if __name__ == '__main__':
app.run_server()
When ran with multiple
Just the one
if anyone else has this problem, the definition has to come immediately after their callback. My formatting was also bad I guess. It should look like:
#app.callback(Output('Health', 'figure'),
Input('graph-update3', 'n_intervals'))
def update_graph3(n):
X.append(X[-1]+1)
Y.append(Y[-1]+Y[-1] * random.uniform(-0.1,0.1))
#those are random points, we'd socket something in
data = go.Scatter(
x=list(X),
y=list(Y),
name='Scatter',
mode= 'lines+markers'
)
print("data:", data)
return {'data': [data],
'layout' : go.Layout(title="Health",
xaxis_title="Time",
yaxis_title="Health",
xaxis=dict(range=[min(X),max(X)]),
yaxis = dict(range = [min(Y),max(Y)])
)
}

Plotly: How to create 'input text' on one page and output (graphs) on second page or tab, by using plotly-dash?

I am creating an app where the first page should take only 'text input' and results(graph) must show on second page or new tab. I do not want text input and charts on the same page. It means, if I write the input as 'USA' in text input bar, the graph of USA should populate on second tab. Following is the working code that I have written so far in dropdown format. In this code, dropdown and graphs are on the same page which I do not want. Please suggest.
import pandas as pd
import plotly.express as px
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Output, Input
import numpy as np
import plotly.io as pio
pio.renderers.default='browser'
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Economy Analysis"),
dcc.Dropdown(id='Country_select',
options=[{'label': x, 'value': x}
for x in df.Country.unique()],
value = 'USA'
),
dcc.Graph(id ='my-graph', figure = {})
])
#app.callback(
Output(component_id = 'my-graph', component_property = 'figure'),
Input(component_id = 'Country_select', component_property = 'value'))
def interactive_graphing(value_country):
print(value_country)
s = 100
cat_g = ["developing","develop"]
sample_cat = [cat_g[np.random.randint(0,2)]for i in range(100)]
df = pd.DataFrame({"Country": np.random.choice(["USA", "JPY", "MEX", "IND", "AUS"], s),
"Net": np.random.randint(5, 75, s),
})
df["sample_cat"] = sample_cat
df = df[df.Country==value_country]
df2 = df.pivot_table(index='Country',columns='sample_cat',values='Net',aggfunc='sum')
df2.reset_index(inplace=True)
fig = px.bar(df2, x="Country",
y=['develop','developing'])
return fig
if __name__=='__main__':
app.run_server()
You can use dcc.Tabs and dcc.Tab containers in your layout, and put the input/graph in separate tabs. Dash bootstrap components tabs would also work for this. The ids will still work as inputs/outputs with your callback.
Sample layout:
app.layout = html.Div([
html.H1("Economy Analysis"),
dcc.Tabs([
dcc.Tab(
label='Dropdown',
children=[
dcc.Dropdown(id='Country_select',
options=[{'label': x, 'value': x}
for x in df.Country.unique()],
value = 'USA')
]
),
dcc.Tab(
label='Graph',
children=[
dcc.Graph(id ='my-graph')
]
)
])
])

In dash, how do I use a callback to update a graph when a radio button is selected?

I'm new to dash and I'm having problems finding examples on using data frames within a callback. I created a weekly radio button and a monthly radio button.
When the monthly radio button is selected I would like the graph to pull data from df_monthly where each bar would be a monthly sum of pay. When the weekly radio button is checked I would like to see the graph populate each bar on a weekly basis which would be each row in the data frame since I get paid once a week.
I'm not certain where I'm going wrong but I keep receiving an error stating TypeError: update_fig() takes 0 positional arguments but 1 was given
The graph populates without data like the picture below. Thanks for any help on this matter.
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.plotly as py
import plotly.graph_objs as go
import sqlite3
import pandas as pd
from functools import reduce
import datetime
conn = sqlite3.connect('paychecks.db')
df_ct = pd.read_sql('SELECT * FROM CheckTotal',conn)
df_earn = pd.read_sql('SELECT * FROM Earnings', conn)
df_whold = pd.read_sql('SELECT * FROM Withholdings', conn)
data_frames = [df_ct, df_earn, df_whold]
df_paystub = reduce(lambda left,right: pd.merge(left,right,on=['Date'], how='outer'), data_frames)
def date_extraction(df):
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.strftime('%Y')
df['Month'] = df['Date'].dt.strftime('%B')
df['Day'] = df['Date'].dt.strftime('%d')
return df
date_extraction(df_paystub)
df_monthly = df_paystub.groupby(['Month']).sum()
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.css.append_css({'external_url': 'https://codepen.io/amyoshino/pen/jzXypZ.css'})
app.layout = html.Div(children=[
html.Div([
html.Div([
dcc.RadioItems(
id='data-view',
options=[
{'label': 'Weekly', 'value': 'Weekly'},
{'label': 'Monthly', 'value': 'Monthly'},
],
value='',
labelStyle={'display': 'inline-block'}
),
], className = 'two columns'),
html.Div([
dcc.Dropdown(
id='year-dropdown',
options=[
{'label': i, 'value': i} for i in df_paystub['Year'].unique()
],
placeholder="Select a year",
),
], className='five columns'),
html.Div([
dcc.Dropdown(
id='month-dropdown',
options=[
{'label': i, 'value': i} for i in df_paystub['Month'].unique()
],
placeholder="Select a month(s)",
multi=True,
),
], className='five columns'),
], className = 'row'),
# HTML ROW CREATED IN DASH
html.Div([
# HTML COLUMN CREATED IN DASH
html.Div([
# PLOTLY BAR GRAPH
dcc.Graph(
id='pay',
)
], className = 'six columns'),
# HTML COLUMN CREATED IN DASH
html.Div([
# PLOTLY LINE GRAPH
dcc.Graph(
id='hours',
figure={
'data': [
go.Scatter(
x = df_earn['Date'],
y = df_earn['RegHours'],
mode = 'lines',
name = 'Regular Hours',
),
go.Scatter(
x = df_earn['Date'],
y = df_earn['OtHours'],
mode = 'lines',
name = 'Overtime Hours',
)
]
}
)
], className='six columns')
], className='row')
], className='ten columns offset-by-one')
#app.callback(dash.dependencies.Output('pay', 'figure'),
[dash.dependencies.Input('data-view', 'value')])
def update_fig():
figure={
'data': [
go.Bar(
x = df_monthly['Month'],
y = df_monthly['CheckTotal'],
name = 'Take Home Pay',
),
go.Bar(
x = df_monthly['Month'],
y = df_monthly['EarnTotal'],
name = 'Earnings',
)
],
'layout': go.Layout(
title = 'Take Home Pay vs. Earnings',
barmode = 'group',
yaxis = dict(title = 'Pay (U.S. Dollars)'),
xaxis = dict(title = 'Date Paid')
)
}
return figure
if __name__ == "__main__":
app.run_server(debug=True)
Hi #prime90 and welcome to Dash.
In glancing at your callback signature it looks like the update_fig() function needs to take the Input you've given it (using dash.dependencies.Input).
The callback is sending this Input what changes in your app you've specified. So it's sending along the value of #data-view you've given to your function update_fig(), which doesn't currently accept any variables, causing the error message.
Just update your function signature and add a couple of boolean variables to rid yourself of the error and get the potential functionality:
def update_fig(dataview_value):
# define your weekly OR monthly dataframe
# you'll need to supply df_weekly similarly to df_monthly
# though DO NOT modify these, see note below!
df = df_weekly if dataview == 'weekly' else df_monthly
dfkey = 'Week' if 'week' in df.columns else 'Month' # eh, worth a shot!
figure={
'data': [
go.Bar(
x = df[dfkey],
y = df['CheckTotal'],
name = 'Take Home Pay',
),
go.Bar(
x = df[dfkey],
y = df['EarnTotal'],
name = 'Earnings',
)
],
'layout': go.Layout(
title = 'Take Home Pay vs. Earnings',
barmode = 'group',
yaxis = dict(title = 'Pay (U.S. Dollars)'),
xaxis = dict(title = 'Date Paid')
)
}
return figure
As was written in the comments above, you'll need to do some type of prior manipulation to create a df_weekly, as you have with your current df_monthly.
In addition, the code snippet I wrote assumes the df column is named "Week" and "Month"--obviously update these as is necessary.
Data manipulation in Dash:
Ensure you read the data sharing docs, as they highlight how data should never be modified out of scope.
I hope this helps :-)

Categories

Resources