I have been trying to create a Geoscatter Plot with Plotly where the marker size should indicate the number of customers (row items) in one city (zip_city). I based my code on two templates from the Plotly documentation: United States Bubble Map and the aggregation part Mapping with Aggregates.
I managed to put together a code that does what I want, except for one drawback: when I hover over a bubble, I would like to see the name of the city plus number of customers (the result from the aggregation), so something like Aguadilla: 2. Can you help me on how to do this?
Here is my code (as a beginner with plotly, I am also open to code improvements):
import plotly.offline as pyo
import pandas as pd
df = pd.DataFrame.from_dict({'Customer': [111, 222, 555, 666],
'zip_city': ['Aguadilla', 'Aguadilla', 'Arecibo', 'Wrangell'],
'zip_latitude':[18.498987, 18.498987, 18.449732,56.409507],
'zip_longitude':[-67.13699,-67.13699,-66.69879,-132.33822]})
data = [dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = df['zip_longitude'],
lat = df['zip_latitude'],
text = df['Customer'],
marker = dict(
size = df['Customer'],
line = dict(width=0.5, color='rgb(40,40,40)'),
sizemode = 'area'
),
transforms = [dict(
type = 'aggregate',
groups = df['zip_city'],
aggregations = [dict(target = df['Customer'], func = 'count', enabled = True)]
)]
)]
layout = dict(title = 'Customers per US City')
fig = dict( data=data, layout=layout )
pyo.plot( fig, validate=False)
Update:
Can I access the result of the transforms argument directly in the data argument to show the number of customers per city?
You can create a list, that will contains what you want and then set text=list in data. Also do not forget specify hoverinfo='text'.
I am updated your code, so try this:
import pandas as pd
import plotly.offline as pyo
df = pd.DataFrame.from_dict({'Customer': [111, 222, 555, 666],
'zip_city': ['Aguadilla', 'Aguadilla', 'Arecibo', 'Wrangell'],
'zip_latitude':[18.498987, 18.498987, 18.449732,56.409507],
'zip_longitude':[-67.13699,-67.13699,-66.69879,-132.33822]})
customer = df['Customer'].tolist()
zipcity = df['zip_city'].tolist()
list = []
for i in range(len(customer)):
k = str(zipcity[i]) + ':' + str(customer[i])
list.append(k)
data = [dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = df['zip_longitude'],
lat = df['zip_latitude'],
text = list,
hoverinfo = 'text',
marker = dict(
size = df['Customer'],
line = dict(width=0.5, color='rgb(40,40,40)'),
sizemode = 'area'
),
transforms = [dict(
type = 'aggregate',
groups = df['zip_city'],
aggregations = [dict(target = df['Customer'], func = 'count', enabled = True)]
)]
)]
layout = dict(title = 'Customers per US City')
fig = dict(data=data, layout=layout)
pyo.plot(fig, validate=False)
Related
I'm trying to create a machine shop schedule that is color coded by parts that belong to the same assembly. I'm using plotly express timeline to create the Gantt. It is reading an excel file on my desktop to generate the schedule. I created a sample below. The goal is to have all the Chair parts be the same color, and all the Desk parts be the same color.
Here's the code to read the excel file and create the Gantt:
df = pd.read_excel(r"C:\Users\john.doe\Documents\Machine Durations - Sample.xlsx")
df['Start Shift'] = df['Start Shift'].astype(int)
df['Finish'] = df['Finish'].astype(int)
#display(df)
# create a slice if the df for the rank = 1
dfRank1 = df[df.Rank == 1]
# reindex it
dfRank1 = dfRank1.reset_index()
#display(dfRank1)
#Create the visual
df["Part"] = df["Part"].astype(str)
df["delta"] = df["Finish"]-df["Start Shift"]
fig = px.timeline(df,x_start ="Start Shift", x_end = "Finish", y = "Machine", hover_name ="Part",color = "Part", text = "Part", title = "Machine Shop Cycle", opacity = .75)
fig.update_yaxes(autorange="reversed")
fig.layout.xaxis.type = 'linear'
#fig.data[0].x = df.delta.tolist()
for d in fig.data:
filt = df['Part'] == d.name
d.x = df[filt]['delta'].tolist()
fig.update_traces(textposition='inside')
fig.show()
good practice is paste you data as text into a question
have made two changes
put Assembly into hover_data so that it is in customdata of each trace
loop through traces to update marker_color based on Assembly in customdata
# update colors to that of the assembly
cmap = {"Chair":"red", "Desk":"blue"}
fig.for_each_trace(lambda t: t.update({"marker":{"color":[cmap[a] for a in t["customdata"][:,0]]}}))
full code
import pandas as pd
import plotly.express as px
import io
df = pd.read_csv(
io.StringIO(
"""Part,Machine,Duration,Duration Shifts(6),Start Shift,Finish,Index,Assembly,Rank
Legs,Lathe,100,5,0,5,1,Chair,A
Seat,Mill,400,5,0,5,1,Chair,A
Back,Mill,200,3,5,8,1,Chair,A
Legs,Lathe,200,3,5,8,1,Desk,A
Table Top,Mill,200,3,8,11,1,Desk,A
Wheels,Mill-Turn,200,10,0,10,1,Desk,A"""
)
)
df["Start Shift"] = df["Start Shift"].astype(int)
df["Finish"] = df["Finish"].astype(int)
# display(df)
# create a slice if the df for the rank = 1
dfRank1 = df[df.Rank == 1]
# reindex it
dfRank1 = dfRank1.reset_index()
# display(dfRank1)
# Create the visual
df["Part"] = df["Part"].astype(str)
df["delta"] = df["Finish"] - df["Start Shift"]
fig = px.timeline(
df,
x_start="Start Shift",
x_end="Finish",
y="Machine",
hover_name="Part",
hover_data=["Assembly"], # want this for setting color
color="Part",
text="Part",
title="Machine Shop Cycle",
opacity=0.75,
)
fig.update_yaxes(autorange="reversed")
fig.layout.xaxis.type = "linear"
# fig.data[0].x = df.delta.tolist()
for d in fig.data:
filt = df["Part"] == d.name
d.x = df[filt]["delta"].tolist()
fig.update_traces(textposition="inside")
# update colors to that of the assembly
cmap = {"Chair":"red", "Desk":"blue"}
fig.for_each_trace(lambda t: t.update({"marker":{"color":[cmap[a] for a in t["customdata"][:,0]]}}))
output
I want to show how many mma fighters each country has.This code is works. But USA has not any color.(USA has the greatest value)How can I fix it?
https://www.kaggle.com/firaterdemdogan/mma-practice
data = [ dict(
type = 'choropleth',
locations = index,
locationmode = 'country names',
z = values,
text = index,
#colorscale = [[0,'rgb(255, 255, 255)'],[1,'rgb(56, 142, 60)']],
#colorscale = [[0,'rgb(255, 255, 255)'],[1,'rgb(220, 83, 67)']],
colorscale = [[0,"rgb(5, 10, 172)"],[0.85,"rgb(40, 60, 190)"],[0.9,"rgb(70, 100, 245)"],\
[0.94,"rgb(90, 120, 245)"],[0.97,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
autocolorscale = False,
reversescale = True,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
tickprefix = '',
title = 'Countries Of Fighters'),
) ]
layout = dict(
title = 'Countries Of Fighters',
geo = dict(
showframe = False,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False)
fundamentally country is not normalised to standard values
have used https://restcountries.eu/#api-endpoints-name to get country ISO standard details. It's not quick but it works
now mma dataframe has two additional columns, isocountry - standardised name iso3code - standard 3 character code
there are still some countries that do not normalize
have not focused on formatting figures, just simple ones to demonstrate it works when country data has been standardised
import kaggle.cli
import sys, requests
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import plotly.express as px
# download data set
# https://www.kaggle.com/binduvr/pro-mma-fighters
sys.argv = [
sys.argv[0]
] + "datasets download binduvr/pro-mma-fighters".split(
" "
)
kaggle.cli.main()
zfile = ZipFile("pro-mma-fighters.zip")
dfs = {f.filename:pd.read_csv(zfile.open(f)) for f in zfile.infolist() }
mma = dfs['pro_mma_fighters.csv']
# country names are not clean...
def iso_dtl(c):
cmap = {}
res = requests.get(f"https://restcountries.eu/rest/v2/name/{c}")
if res.status_code==200:
if len(res.json())==1:
cmap = {"isocountry":res.json()[0]["name"], "iso3code":res.json()[0]["alpha3Code"]}
else:
df = pd.DataFrame(res.json()).sort_values("population", ascending=False).reset_index()
cmap = {"isocountry":df.loc[0,"name"], "iso3code":df.loc[0, "alpha3Code"]}
else:
cmap = {"isocountry":c, "iso3code":""}
return {**{"country":c}, **cmap}
dfcountry = pd.DataFrame([iso_dtl(c) for c in mma["country"].unique()])
mma = mma.merge(dfcountry, on="country")
px.bar(mma["isocountry"].value_counts()[:5]).show()
px.choropleth(mma["isocountry"].value_counts().reset_index(),
locations="index", locationmode="country names", color="isocountry")
Trying to make a choropleth map in plotly using some data I have in a csv file. Have created the following map:
my choromap
This isn't a correct display of the data however. Here is an excerpt of my csv file:
China,2447
...
Trinidad And Tobago,2
Turkey,26
Ukraine,8
United Arab Emirates,97
United States of America,2008
Based on this I'd expected China to appear in a similar colour to that which the US has loaded in, however it looks the same as countries with values of less than 200. Does anyone know what the reason for this is?
Here's my full code for reference:
import pandas as pd
import plotly as py
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [dict(type='choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
text = df['Country'],
colorbar = {'title':'Apps per country'},
colorscale = 'Jet',
reversescale = False
)]
layout = dict(title='Application Jan-June 2018',
geo = dict(showframe=False,projection={'type':'mercator'}))
choromap = dict(data = data,layout = layout)
red = py.offline.plot(choromap,filename='world.html')
per your comment I would make sure that china is indeed 2447 and not something like 244. I would also follow the plotly documentation although you example code works.
import plotly.plotly as py
import pandas as pd
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [ dict(
type = 'choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
colorscale = 'Jet',
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
tickprefix = '',
title = 'Apps per country'),
) ]
layout = dict(
title = 'app_country_data_minus_uk',
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )
or if you want to plot it offline:
import plotly.plotly as py
import pandas as pd
import plotly
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [ dict(
type = 'choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
colorscale = 'Jet',
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
title = 'Apps per country'),
) ]
layout = dict(
title = 'app_country_data_minus_uk',
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
plotly.offline.plot(fig,filename='world.html')
If you use iplot you will be able to edit the chart and see the data in plotly to make sure your data looks correct
Hi I'm fairly new to Python, Plotly and Jupyter Notebook. I would like to use a slider to select the number of days as the range in a query to which a graph is created from. My only issue is that I want the graph to automatically update on interaction with the slider, without having to re-run the query and graph creation. My code is below:
slider = widgets.IntSlider()
display(slider)
sliderVal = slider.value
df = pd.read_sql(f"""
SELECT CASE WHEN SiteID LIKE 3 THEN 'BLAH'
WHEN SiteID LIKE 4 THEN 'BLAHBLAH'
END AS Website,
COUNT(1) AS Count
FROM viewName
WHERE (TimeStamp > DATEADD(DAY, -{sliderVal}, GETDATE()))
GROUP BY SiteId
ORDER BY Count DESC
""", conn)
data = [go.Bar(x=df.Website, y=df.Count)]
layout = go.Layout(
xaxis=dict(
title='Website'),
yaxis=dict(
title='Exception count'),
title=f'Number of exceptions per user in the last {sliderVal} days')
chart = go.Figure(data=data, layout=layout, )
py.iplot(chart, filename='WebExceptions')
Thanks in advance!
If you do not want to rerun the query, then your data frame df must contain the results for all the values that you want the intslider widget to take, the function linked to the widget will then simply filter the data and redraw the graph with the new filtered data.
Here's an example with some dummy data:
import ipywidgets as widgets
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
py.init_notebook_mode(connected = True)
# Dummy data, to be replaced with your query result for the range of sliderVal
df = pd.DataFrame({'Days': [1] * 3 + [2] * 4 + [3] * 5,
'Website': [1,2,3, 4,5,6,7, 8,9,10,11,12],
'Count': [10,5,30, 15,20,25,12, 18,17,30,23,27]})
def update_plot(sliderVal):
filtered_df = df.query('Days== ' + str(sliderVal))
data = [go.Bar(x = filtered_df.Website,
y = filtered_df.Count)]
layout = go.Layout(
xaxis = dict(title = 'Website'),
yaxis = dict(title = 'Exception count'),
title = f'Number of exceptions per user in the last {sliderVal} days')
chart = go.Figure(data = data, layout = layout, )
py.iplot(chart, filename = 'WebExceptions')
# links an IntSlider taking values between 1 and 3 to the update_plot function
widgets.interact(update_plot, sliderVal = (1, 3))
and here is the result with sliderVal = 2:
Trying to use plotly to combine this line graph (that's already stacked):
import plotly
import plotly.graph_objs as plgo
#... Some Code
max = plgo.Scatter(x = day_times_str, y = max_val , name = "Max")
min = plgo.Scatter(x = day_times_str, y = min_val, name = "Min")
layout_opts = plgo.Layout(
xaxis = dict(title = 'xaxis'),
yaxis = dict(title = 'yaxis', rangemode = "tozero"),
)
figure1 = plgo.Figure(
data = [max, min],
layout = layout_opts,
)
and a map that shows location above this line graph...
#Assume geo_coord is a dataframe of coordinates, with columns 'lat', 'long' and 'text'
geo_data = [
plgo.Scattermapbox(
lat = geo_coord['lat'],
lon = geo_coord['lon'],
text = geo_coord['text'],
marker = dict(
color = geo_coord['text'],
size = 12,
),
mode = 'markers'
)
]
geo_layout = plgo.Layout(
autosize=True,
hovermode='closest',
mapbox=dict(
accesstoken= GMapsAPIHelper.MAPBOX_TOKEN, #Constant stored in global object
bearing=0,
pitch=0,
center=dict(
lat=49.04,
lon=-122.7
), #Modify by project details
zoom= 13
),
)
figure2 = dict(data = geo_data, layout = geo_layout)
plotly.offline.plot takes only 1 figure or set of data and I cannot pass in a list for graphing. I have tried using append_trace but because I've defined x and y axes in the line graph layout, this causes an error for the map, as follows:
File "C:\Anaconda2\lib\site-packages\plotly\graph_objs\graph_objs.py", line 934, in append_trace
trace['xaxis'] = ref[0]
TypeError: list indices must be integers, not str
Any help in solving this issue is appreciated.