Hi I'm fairly new to Python, Plotly and Jupyter Notebook. I would like to use a slider to select the number of days as the range in a query to which a graph is created from. My only issue is that I want the graph to automatically update on interaction with the slider, without having to re-run the query and graph creation. My code is below:
slider = widgets.IntSlider()
display(slider)
sliderVal = slider.value
df = pd.read_sql(f"""
SELECT CASE WHEN SiteID LIKE 3 THEN 'BLAH'
WHEN SiteID LIKE 4 THEN 'BLAHBLAH'
END AS Website,
COUNT(1) AS Count
FROM viewName
WHERE (TimeStamp > DATEADD(DAY, -{sliderVal}, GETDATE()))
GROUP BY SiteId
ORDER BY Count DESC
""", conn)
data = [go.Bar(x=df.Website, y=df.Count)]
layout = go.Layout(
xaxis=dict(
title='Website'),
yaxis=dict(
title='Exception count'),
title=f'Number of exceptions per user in the last {sliderVal} days')
chart = go.Figure(data=data, layout=layout, )
py.iplot(chart, filename='WebExceptions')
Thanks in advance!
If you do not want to rerun the query, then your data frame df must contain the results for all the values that you want the intslider widget to take, the function linked to the widget will then simply filter the data and redraw the graph with the new filtered data.
Here's an example with some dummy data:
import ipywidgets as widgets
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
py.init_notebook_mode(connected = True)
# Dummy data, to be replaced with your query result for the range of sliderVal
df = pd.DataFrame({'Days': [1] * 3 + [2] * 4 + [3] * 5,
'Website': [1,2,3, 4,5,6,7, 8,9,10,11,12],
'Count': [10,5,30, 15,20,25,12, 18,17,30,23,27]})
def update_plot(sliderVal):
filtered_df = df.query('Days== ' + str(sliderVal))
data = [go.Bar(x = filtered_df.Website,
y = filtered_df.Count)]
layout = go.Layout(
xaxis = dict(title = 'Website'),
yaxis = dict(title = 'Exception count'),
title = f'Number of exceptions per user in the last {sliderVal} days')
chart = go.Figure(data = data, layout = layout, )
py.iplot(chart, filename = 'WebExceptions')
# links an IntSlider taking values between 1 and 3 to the update_plot function
widgets.interact(update_plot, sliderVal = (1, 3))
and here is the result with sliderVal = 2:
Related
I want to draw a chart of btc and show one hour after one hour like a slideshow (no animation or effect is needed). However I'm not able to update the figure stemming from px.line() without the browser opening a new tab. How would I do this, like remaining in the same tab and just updating the data of the line and redraw it?
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import time
df = pd.read_csv('btc.csv')
i=0
shown=False
part_df1=df.iloc[0:60]
fig = px.line(part_df1, x = 'date_x', y = 'price_y', title='btc price ..')
fig.show()
while i<600:
time.sleep(0.25)
i=i+60
part_df=df.iloc[i:i+60]
fig = px.line(part_df, x = 'date_x', y = 'price_y', title='btc price ..')
fig.show()
so something like:
fig.update(px.line(part_df, x = 'date_x', y = 'price_y', title='btc price ..'))
instead of
fig = px.line(part_df, x = 'date_x', y = 'price_y', title='btc price ..')
fig.show()
I've put together an example plotly-dash app that performs a live update for data over a predefined period of time. Some of the important features are:
the dcc.Interval object automatically triggers the callback function for updating the figure every interval of time (e.g. update the figure every 1 second). there's also an n_interval counter which will be useful for helping us keep track of the indices in the df we are iterating through (documentation here)
the extendData property of dcc.Graph allows you to return a dictionary from your callback (and saves you the trouble of having to directly modify the data inside the figure object). (documentation here)
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
# df = pd.read_csv('btc.csv')
## create some random walk data
np.random.seed(42)
change = np.random.choice([-1,0,1], 600-1)
btc = [20000]
for y_change in change:
btc_new = btc[-1] + 1000*y_change
btc.append(btc_new)
df = pd.DataFrame({
'date': pd.date_range(start='2022-01-01', periods=600),
'btc': btc
})
part_df=df.iloc[0:60]
fig = px.line(part_df, x = 'date', y = 'btc', title='btc price')
app = dash.Dash(__name__)
app.layout = html.Div(
html.Div([
dcc.Graph(id='live-update-graph', figure=fig),
dcc.Interval(
id='interval-component',
interval=1000, # 0.25*1000, # in milliseconds
n_intervals=1,
max_intervals=len(df) // 60 - 1
)
])
)
#app.callback(Output('live-update-graph', 'extendData'),
[Input('interval-component', 'n_intervals'),
Input('live-update-graph', 'figure')])
def extend_trace(n, fig):
if 60*n > len(df):
return {}
else:
part_df = df.iloc[60*n:60*(n+1)]
return (dict(
x=[part_df['date'].tolist()],
y=[part_df['btc'].tolist()],
))
app.run(debug=True)
Update: if you don't need to the data to extend, and only want to show the new incoming data, then you can use the figure property of dcc.Graph instead of extendData. Then your callback would look like the following:
#app.callback(Output('live-update-graph', 'figure'),
[Input('interval-component', 'n_intervals'),
Input('live-update-graph', 'figure')])
def extend_trace(n, fig):
if 60*n > len(df):
return fig
else:
part_df = df.iloc[60*n:60*(n+1)]
return (dict(
data=[dict(
x=part_df['date'].tolist(),
y=part_df['btc'].tolist(),
)]
))
I've got a program that displays stock market info for a google. I can adjust it with a DateRangeSlider to increase the dates that we're looking at. I also have a hovertool set on the chart to display the various aspects of the data for each respective day.
The issue I'm having is on callback. Instead of the hover data being refreshed with each call, it retains memory of previous calls and just stacks the previous hover info on top of the new.
Here's my code for running in Jupyter:
from pandas_datareader import data
from pandas import Timedelta
from datetime import datetime, date
from bokeh.plotting import figure, show
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter, DatetimeTickFormatter, TextInput, DateRangeSlider
from bokeh.io import output_notebook, curdoc
output_notebook()
def inc_dec(c, o):
"""Determines if the financial day with and Increase or Decrease"""
if c > o:
return "Increase"
elif c == 0:
return "Equal"
else:
return "Decrease"
def get_sources(start = date(2016, 3, 1), end = date(2016, 3, 10), company = "GOOG"):
"""Gets all my dataframes for each glyph below"""
#Gather data for df's
df = data.DataReader(company, data_source="yahoo", start=start, end=end)
df["Status"] = [inc_dec(c, o) for c, o in zip(df.Close, df.Open)]
df["Mid"] = (df.Open + df.Close) / 2
df["Height"] = abs(df.Open - df.Close)
df["Width"] = 12*60*60*1000
df["Segment, Left"] = df.index - Timedelta(hours=2)
df["Segment, Right"] = df.index + Timedelta(hours=2)
#my df is complete. Split them
inc_df = df[df.Status == "Increase"]
dec_df = df[df.Status == "Decrease"]
#inc_s = ColumnDataSource(inc_df)
#dec_s = ColumnDataSource(dec_df)
#tot_s = ColumnDataSource(df)
#Dictionary of dataframes, Increasing, Decreasing, and Total
dfs = {"I": inc_df, "D": dec_df, "T": df}
return dfs
def fin(doc):
"""Initialize the graphs and make an application"""
#Get df's and convert to CDS
dfs = get_sources()
inc_s = ColumnDataSource(dfs["I"])
dec_s = ColumnDataSource(dfs["D"])
tot_s = ColumnDataSource(dfs["T"])
hover = HoverTool(names=["inc","dec"],
tooltips = [("Date", "#Date{%m/%d/%Y}"),
("High", "#High{$0.00}"),
("Low","#Low{$0.00}"),
("Open","#Open{$0.00}"),
("Close","#Close{$0.00}"),
("Volume", "#Volume{0,0}")],
formatters = {"#Date" : "datetime"})
#Formatting the plot, p
p = figure(x_axis_type="datetime", width=1000, height=300, sizing_mode="scale_width", tools="crosshair,pan,wheel_zoom,box_zoom,reset")
#p.title.text = "{}: Finacial Data".format(company) #Change this with company later
p.title.text = "GOOG: Financial Data"
p.title.align = "center"
p.xaxis.axis_label="Date"
p.yaxis.axis_label="Price"
p.yaxis.formatter = NumeralTickFormatter(format="$0,0.00")
p.grid.grid_line_alpha = 0.3
#Add glyphs
p.segment(x0='Date', y0='High', x1='Date', y1='Low', source=tot_s) #vertical
p.segment(x0='Segment, Left', y0='High', x1='Segment, Right', y1='High', source=tot_s) #high
p.segment(x0='Segment, Left', y0='Low', x1='Segment, Right', y1='Low', source=tot_s) #low
p.rect(x="Date", y="Mid", width="Width", height="Height", name="inc",
fill_color="#00FFFF", line_color="black", source=inc_s)
p.rect(x="Date", y="Mid", width="Width", height="Height", name="dec",
fill_color="#B22222", line_color="black", source=dec_s)
p.add_tools(hover)
#the callbacks
def cb_date(attr, new, old):
"""Callback for the DateRangeSlider"""
if not isinstance(new[0], int):
return None
else:
#Get new dates, pass them to get_sources to get df's, add new CDS's
start = date.fromtimestamp(new[0]/1000)
end = date.fromtimestamp(new[1]/1000)
dfs = get_sources(start=start, end=end)
inc_s.stream(dfs["I"])
dec_s.stream(dfs["D"])
tot_s.stream(dfs["T"])
#def cb_com(attr, new, old):
#Changes the company name, do later
#add widgets
slider = DateRangeSlider(start=date(2016, 1, 1),
end=date(2016, 5, 1),
value=(date(2016, 3, 1), date(2016, 3, 10)),
step=1,
title="Change Dates:")
slider.on_change('value', cb_date)
doc.add_root(column(slider,p))
show(fin)
Let me know what how I can go forward with this. I'm pretty lost.
I cannot run your code without some test data, but it seems to me that the issue is with the calls to the stream function. Streaming data does not remove the old data, even if the X coordinate is the same (data sources don't know and don't care about coordinates). If you want to replace the data for some particular date, you should use patch. If you want to replace the whole data, you should just assign the new value to the data attribute of a data source.
I've taken the below code from another source - it is not my own code.
The code allows you to select a cell in the data table, and the 'downloads' data for that cell will chart based on the row of the cell selected.
How do I expand this code such that if I have multiple variables (eg. 'downloads' and 'uploads') and so more columns in the data table, I can chart data based on that cell (so where row AND column are important)? Alternatively, how can I define as a variable the column number of a selected cell (in the same way selected_row below can be used to define the row number)?
from datetime import date
from random import randint
from bokeh.models import ColumnDataSource, Column
from bokeh.plotting import figure, curdoc
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn, Div
import numpy as np
data = dict(dates = [date(2014, 3, i + 1) for i in range(10)],
downloads = [randint(0, 100) for i in range(10)])
d_source = ColumnDataSource(data)
columns = [TableColumn(field = "dates", title = "Date", formatter = DateFormatter()),
TableColumn(field = "downloads", title = "Downloads")]
data_table = DataTable(source = d_source, columns = columns, width = 400, height = 280)
def table_select_callback(attr, old, new):
selected_row = new[0]
download_count = data['downloads'][selected_row]
chart_data = np.random.uniform(0, 100, size = download_count)
p = figure(title = 'bla')
r = p.line(x = range(len(chart_data)), y = chart_data)
root_layout.children[1] = p
d_source.selected.on_change('indices', table_select_callback)
root_layout = Column(data_table, Div(text = 'Select Date'))
curdoc().add_root(root_layout)
This is the final working code (run from command line with bokeh serve --show app.py):
from datetime import date
from random import randint
from bokeh.models import ColumnDataSource, Column, TableColumn, DateFormatter, DataTable, CustomJS
from bokeh.plotting import figure, curdoc
source = ColumnDataSource(dict(dates = [date(2014, 3, i + 1) for i in range(10)], downloads = [randint(0, 100) for i in range(10)]))
columns = [TableColumn(field = "dates", title = "Date", formatter = DateFormatter()), TableColumn(field = "downloads", title = "Downloads")]
data_table = DataTable(source = source, columns = columns, width = 400, height = 280, editable = True, reorderable = False)
info_source = ColumnDataSource(dict(row = [], column = []))
source_code = """
var grid = document.getElementsByClassName('grid-canvas')[0].children;
var row, column = '';
for (var i = 0,max = grid.length; i < max; i++){
if (grid[i].outerHTML.includes('active')){
row = i;
for (var j = 0, jmax = grid[i].children.length; j < jmax; j++)
if(grid[i].children[j].outerHTML.includes('active')) {
column = j;
source2.data = {row: [row], column: [column]};
}
}
}"""
callback = CustomJS(args = dict(source = source, source2 = info_source), code = source_code)
source.selected.js_on_change('indices', callback)
def py_callback(attr, old, new):
source.selected.update(indices = [])
print(info_source.data)
source.selected.on_change('indices', py_callback)
curdoc().add_root(Column(data_table))
My suggestion is to use my another post that uses a JS callback to access the row and column of the selected cell. This must be a JS callback as it uses HTML elements to walk through. So the steps are as follows:
Define a new ColumnDataSource that will contain the row and column number of a clicked cell
info_source = ColumnDataSource(dict(row = [], column = []))
Use the JS callback from this post to fill in the row and column values in this new table_info_source like this:
callback=CustomJS(args=dict(source=d_source,source2=info_source),code=source_code)
source.selected.js_on_change('indices', callback)
Inside the JS callback store the row and column index like this:
source2.data = {row:[row],column:[column]};
Access the info_source.data information in your Python callback to draw a plot
print (info_source.data)
Both callback are attached to the same source but usually JS callback is executed first so the data should be available in the Python callback on time. Having the index of row and column of the clicked cell you should be able to retrieve the required data and draw your chart.
I am creating a bokeh plot with a slider to refresh plot accordingly. There are 2 issues with the code posted.
1. The plot is not refreshed as per the slider. Please help in providing a fix for this issue.
2. Plot is not displayed with curdoc() when bokeh serve --show fn.ipynb is used
I'm trying to visualise this CSV file.
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool, Slider
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
#Creating Column Data Source
source = ColumnDataSource({
'x' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('District_Name')
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
#xmin, xmax = min(data.Crop_Year), max(data.Crop_Year)
#ymin, ymax = min(data.Production), max(data.Production)
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('District_Name')
}
source.data = new_data
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
layout = row(widgetbox(slider), p)
curdoc().add_root(layout)
show(layout)
Also tried a different option using CustomJS as below, but still no luck.
callback = CustomJS(args=dict(source=source), code="""
var data = source.data;
var yr = slider.value;
var x = data['x']
var y = data['y']
'x' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Area;
'y' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Production;
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district');
}
source.change.emit();
""")
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
yr_slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year', callback=callback)
callback.args["slider"] = yr_slider
Had a lot of issues trying to execute your code and I have changed some things, so feel free to correct me if did something wrong.
The error was caused by the creation of the ColumnDataSource, I had to change the level value to Crop_Year instead of Year. The loc 'ABC' also caused an error so I removed that too (And I had to add source = ColumnDataSource({, you probably forgot to copy that)
I also added a dropdown menu so it's possible to only show the data from one district.
Also, I'm not quite sure if it's possible to start a bokeh server by supplying a .ipynb file to --serve. But don't pin me down on that, I never use notebooks. I've tested this with a .py file.
#!/usr/bin/python3
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
from bokeh.models.widgets import Select, Slider
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==2001]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')=='Tamil Nadu']
#Creating Column Data Source
source = ColumnDataSource({
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
glyphs = p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
#Update glyph locations
yr = slider.value
state = select.value
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')==state]
new_data = {
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
}
source.data = new_data
#Update colors
district_list = crop_dt.loc[([state]), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
glyphs.glyph.fill_color = dict(field='district', transform=color_mapper)
glyphs.glyph.line_color = dict(field='district', transform=color_mapper)
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
#Creating drop down for state
options = list(set(crop_dt.index.get_level_values('State_Name').tolist()))
options.sort()
select = Select(title="State:", value="Tamil Nadu", options=options)
select.on_change('value', update_plot)
layout = row(widgetbox(slider, select), p)
curdoc().add_root(layout)
#Jasper Thanks a lot. This works, however it doesnt work with .loc[(['Tamil Nadu']), :]. Reason for having this is to filter the data by adding a bokeh dropdown or radio button object and refresh the plot based on the filters. The below code works only if .loc[(['Tamil Nadu']), :] is removed. Is there any other way to fix this please?
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Area.tolist(),
'y' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Production.tolist(),
'state' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('State_Name').tolist(),
'district' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').tolist()
}
source.data = new_data
I have been trying to create a Geoscatter Plot with Plotly where the marker size should indicate the number of customers (row items) in one city (zip_city). I based my code on two templates from the Plotly documentation: United States Bubble Map and the aggregation part Mapping with Aggregates.
I managed to put together a code that does what I want, except for one drawback: when I hover over a bubble, I would like to see the name of the city plus number of customers (the result from the aggregation), so something like Aguadilla: 2. Can you help me on how to do this?
Here is my code (as a beginner with plotly, I am also open to code improvements):
import plotly.offline as pyo
import pandas as pd
df = pd.DataFrame.from_dict({'Customer': [111, 222, 555, 666],
'zip_city': ['Aguadilla', 'Aguadilla', 'Arecibo', 'Wrangell'],
'zip_latitude':[18.498987, 18.498987, 18.449732,56.409507],
'zip_longitude':[-67.13699,-67.13699,-66.69879,-132.33822]})
data = [dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = df['zip_longitude'],
lat = df['zip_latitude'],
text = df['Customer'],
marker = dict(
size = df['Customer'],
line = dict(width=0.5, color='rgb(40,40,40)'),
sizemode = 'area'
),
transforms = [dict(
type = 'aggregate',
groups = df['zip_city'],
aggregations = [dict(target = df['Customer'], func = 'count', enabled = True)]
)]
)]
layout = dict(title = 'Customers per US City')
fig = dict( data=data, layout=layout )
pyo.plot( fig, validate=False)
Update:
Can I access the result of the transforms argument directly in the data argument to show the number of customers per city?
You can create a list, that will contains what you want and then set text=list in data. Also do not forget specify hoverinfo='text'.
I am updated your code, so try this:
import pandas as pd
import plotly.offline as pyo
df = pd.DataFrame.from_dict({'Customer': [111, 222, 555, 666],
'zip_city': ['Aguadilla', 'Aguadilla', 'Arecibo', 'Wrangell'],
'zip_latitude':[18.498987, 18.498987, 18.449732,56.409507],
'zip_longitude':[-67.13699,-67.13699,-66.69879,-132.33822]})
customer = df['Customer'].tolist()
zipcity = df['zip_city'].tolist()
list = []
for i in range(len(customer)):
k = str(zipcity[i]) + ':' + str(customer[i])
list.append(k)
data = [dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = df['zip_longitude'],
lat = df['zip_latitude'],
text = list,
hoverinfo = 'text',
marker = dict(
size = df['Customer'],
line = dict(width=0.5, color='rgb(40,40,40)'),
sizemode = 'area'
),
transforms = [dict(
type = 'aggregate',
groups = df['zip_city'],
aggregations = [dict(target = df['Customer'], func = 'count', enabled = True)]
)]
)]
layout = dict(title = 'Customers per US City')
fig = dict(data=data, layout=layout)
pyo.plot(fig, validate=False)