Related
In the below example, I would like to group the elements of y axis by continent, and to display the name of the continent at the top of each group. I can't figure out in the layout where we can set it. the example come from this plotly page
import pandas as pd
import plotly.graph_objects as go
from plotly import data
df = data.gapminder()
df = df.loc[ (df.year.isin([1987, 2007]))]
countries = (
df.loc[ (df.year.isin([2007]))]
.sort_values(by=["pop"], ascending=True)["country"]
.unique()
)[5:-10]
data = {"x": [], "y": [], "colors": [], "years": []}
for country in countries:
data["x"].extend(
[
df.loc[(df.year == 1987) & (df.country == country)]["pop"].values[0],
df.loc[(df.year == 2007) & (df.country == country)]["pop"].values[0],
None,
]
)
data["y"].extend([country, country, None]),
data["colors"].extend(["cyan", "darkblue", "white"]),
data["years"].extend(["1987", "2007", None])
fig = go.Figure(
data=[
go.Scatter(
x=data["x"],
y=data["y"],
mode="lines",
marker=dict(
color="grey",
)),
go.Scatter(
x=data["x"],
y=data["y"],
text=data["years"],
mode="markers",
marker=dict(
color=data["colors"],
symbol=["square","circle","circle"]*10,
size=16
),
hovertemplate="""Country: %{y} <br> Population: %{x} <br> Year: %{text} <br><extra></extra>"""
)
]
)
To show grouping by continent instead of the code you showed would require looping through the data structure from dictionary format to data frame. y-axis by continent by specifying a multi-index for the y-axis.
I have limited myself to the top 5 countries by continent because the large number of categorical variables on the y-axis creates a situation that is difficult to see for visualization. You can rewrite/not set here according to your needs. Furthermore, in terms of visualization, I have set the x-axis type to log format because the large discrepancies in the numbers make the visualization weaker. This is also something I added on my own and you can edit it yourself.
import pandas as pd
import plotly.graph_objects as go
from plotly import data
df = data.gapminder()
df = df.loc[(df.year.isin([1987, 2007]))]
# top5 by continent
countries = (df.loc[df.year.isin([2007])]
.groupby(['continent',], as_index=False, sort=[True])[['country','pop']].head()['country']
)
df = df[df['country'].isin(countries.tolist())]
fig = go.Figure()
for c in df['continent'].unique():
dff = df.query('continent == #c')
#print(dff)
for cc in dff['country'].unique():
dfc = dff.query('country == #cc')
fig.add_trace(go.Scatter(x=dfc['pop'].tolist(),
y=[dfc['continent'],dfc['country']],
mode='lines+markers',
marker=dict(
color='grey',
))
)
fig.add_trace(go.Scatter(x=dfc['pop'].tolist(),
y=[dfc['continent'],dfc['country']],
text=dfc["year"],
mode="markers",
marker=dict(
color=["cyan", "darkblue", "white"],
size=16,
))
)
fig.update_layout(autosize=False, height=800, width=800, showlegend=False)
fig.update_xaxes(type='log')
fig.show()
Dataset:
Year , Store_type , MY_SHOPPING ,TRAN_SPEND
2012, LARGE_STORE , HEALTH CONSCIOUS , 49383.70$
2012 , CONVENIENCE_STORE , CONFIDENT COOKS , 13150.00$
2013 , LARGER_STORE , QUICK&EASY , 98765.00$
2013 , LARGER_STORE , TRADITIONAL , 45734.00$
Question: I want to have a pie chart that represents the year in the middle of the chart and have a label = store_type and display percentage of different categories in MY_SHOPPING column plus the name of that and also the TRAN_SPEND amount in dollar
.
My code:
fig = px.pie(removed_index_storeformat, values='TRAN_SPEND', names='STORE_TYPE',
hover_data=['MY_SHOPPING '], labels={'MY_SHOPPING ':'shop'})
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
Donut pie charts are made possible by setting the hole size. Also, use annotations to put text strings inside the Add a numerical column separate from the column of strings marked with dollar signs in the data presented. Use the numerical column for the pie chart and the original dollar-marked column for the text.
import pandas as pd
import numpy as np
import io
data = '''
Year,Store_type,MY_SHOPPING,TRAN_SPEND
2012,LARGE_STORE,HEALTH CONSCIOUS,49383.70$
2012,CONVENIENCE_STORE,CONFIDENT COOKS,13150.00$
2013,LARGER_STORE,QUICK&EASY,98765.00$
2013,LARGER_STORE,TRADITIONAL,45734.00$
'''
df = pd.read_csv(io.StringIO(data), sep=',')
df['TRAN_SPEND2'] = df['TRAN_SPEND'].apply(lambda x: float(x[:-1]))
df
Year Store_type MY_SHOPPING TRAN_SPEND TRAN_SPEND2
0 2012 LARGE_STORE HEALTH CONSCIOUS 49383.70$ 49383.7
1 2012 CONVENIENCE_STORE CONFIDENT COOKS 13150.00$ 13150.0
2 2013 LARGER_STORE QUICK&EASY 98765.00$ 98765.0
3 2013 LARGER_STORE TRADITIONAL 45734.00$ 45734.0
import plotly.express as px
dff = df.query('Year == 2012')
fig = px.pie(dff, values='TRAN_SPEND2', names='MY_SHOPPING',
hover_data=['MY_SHOPPING'], labels={'MY_SHOPPING ':'shop'}, hole=0.3)
fig.update_traces(textposition='inside', text=dff['TRAN_SPEND'], textinfo='percent+label+text')
fig.update_layout(
title_text="Test Title",
annotations=[dict(text='2012', x=0.5, y=0.5, font_size=20, showarrow=False)])
fig.show()
I am creating a dashboard in dash for a course at university. I created 3 histograms however, there are many unique values which give a long range of x values. In my plots I would like to show only the 10 or 20 values that have the highest count (top 10 values). Can someone help me out?
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
# Build App
app = JupyterDash(__name__)
app.layout = html.Div([
html.H1("forensics "),
dcc.Graph(id='graph'),
dcc.Graph(id='graph1'),
dcc.Graph(id='graph2'),
html.Label([
"select market",
dcc.Dropdown(
id='market', clearable=False,
value='whitehousemarket', options=[
{'label': c, 'value': c}
for c in posts['marketextract'].unique()
])
]),
])
# Define callback to update graph
#app.callback(
Output('graph', 'figure'),
Output('graph1', 'figure'),
Output('graph2', 'figure'),
[Input("market", "value")]
)
def update_figure(market):
fig=px.histogram(x=posts['datetime'].loc[posts['marketextract']==market])
fig1=px.histogram(x=posts['username'].loc[posts['marketextract']==market])
fig2=px.histogram(x=posts['drugs'].loc[posts['marketextract']==market])
return [fig, fig1, fig2]
# Run app and display result inline in the notebook
app.run_server(mode='inline')
To my knowledge, px.histogram() does not have a method to exclude certain observations of bins. But judging by the look of your data (please consider sharing a proper sample), what you're doing here is just showing the different counts of some user names. And you can easily do that through a combination of df.groupby() and px.histogram. Or px.bar() or go.Bar() for that matter, but we'll stick with px.histogram since that is what you're seeking help with. Anyway, using random selections of country names from px.gapminder you can use:
dfg = df.groupby(['name']).size().to_frame().sort_values([0], ascending = False).head(10).reset_index()
fig = px.histogram(dfg, x='name', y = 'count')
And get:
If you drop .head(10) you'll get this instead:
And I hope this is the sort of functionality you were looking for. And don't be intimidated by the long df.groupby(['name']).size().to_frame().sort_values([0], ascending = False).reset_index(). I'm not a pandas expert, so you could quite possibly find a more efficient approach. But it does the job. Here's the complete code with some sample data:
# imports
import pandas as pd
import plotly.express as px
import random
# data sample
gapminder = list(set(px.data.gapminder()['country']))[1:20]
names = random.choices(gapminder, k=100)
# data munging
df = pd.DataFrame({'name':names})
dfg = df.groupby(['name']).size().to_frame().sort_values([0], ascending = False).reset_index()
dfg.columns = ['name', 'count']
# plotly
fig = px.histogram(dfg, x='name', y = 'count')
fig.layout.yaxis.title.text = 'count'
fig.show()
I am trying to plot India map using plotly, but unable to find a way to do that. Below is the code which I tried for USA.
import pandas as pd
df_sample = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_sample['State FIPS Code'] = df_sample['State FIPS Code'].apply(lambda x: str(x).zfill(2))
df_sample['County FIPS Code'] = df_sample['County FIPS Code'].apply(lambda x: str(x).zfill(3))
df_sample['FIPS'] = df_sample['State FIPS Code'] + df_sample['County FIPS Code']
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
"#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
"#08519c","#0b4083","#08306b"]
endpts = list(np.linspace(1, 12, len(colorscale) - 1))
fips = df_sample['FIPS'].tolist()
values = df_sample['Unemployment Rate (%)'].tolist()
fig = ff.create_choropleth(
fips=fips, values=values,
binning_endpoints=endpts,
colorscale=colorscale,
show_state_data=False,
show_hover=True, centroid_marker={'opacity': 0},
asp=2.9, title='USA by Unemployment %',
legend_title='% unemployed'
)
fig.layout.template = None
fig.show()
OUTPUT:
In a similar way I just want to draw India's map with hovering values.
and just want output like below...
the output of INDIAN MAP:
The figure factory create_choropleth method that you're using is deprecated and deals with USA counties exclusively. For other maps, you need the GeoJSON for the features you're mapping. Plotly only comes with GeoJSON data for world countries and US states, so you'll have to provide the data for India's states yourself.
Like your example choropleth, let's plot the current number of active COVID-19 cases per state as of July 17 (this comes from indiacovid19.github.io, which is periodically archiving the data from India's Ministry of Health). As for the GeoJSON, a quick search yields a few GitHub repos but it seems the majority are too outdated for our cases data, as they don't include the merging of Dadra and Nagar Haveli and Daman and Diu. Luckily, datameet provides an up-to-date shapefile for India's states which I simplified a bit to reduce the size and converted to GeoJSON using mapshaper, then flipped the polygon winding using geojson-rewind.
Now, as detailed in the Plotly documentation, we can use plotly express to quickly make a choropleth map with our data:
import pandas as pd
import plotly.express as px
df = pd.read_csv("https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/active_cases_2020-07-17_0800.csv")
fig = px.choropleth(
df,
geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
featureidkey='properties.ST_NM',
locations='state',
color='active cases',
color_continuous_scale='Reds'
)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
For more fine control over the plot, we can use the graph objects directly:
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv("https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/active_cases_2020-07-17_0800.csv")
fig = go.Figure(data=go.Choropleth(
geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
featureidkey='properties.ST_NM',
locationmode='geojson-id',
locations=df['state'],
z=df['active cases'],
autocolorscale=False,
colorscale='Reds',
marker_line_color='peachpuff',
colorbar=dict(
title={'text': "Active Cases"},
thickness=15,
len=0.35,
bgcolor='rgba(255,255,255,0.6)',
tick0=0,
dtick=20000,
xanchor='left',
x=0.01,
yanchor='bottom',
y=0.05
)
))
fig.update_geos(
visible=False,
projection=dict(
type='conic conformal',
parallels=[12.472944444, 35.172805555556],
rotation={'lat': 24, 'lon': 80}
),
lonaxis={'range': [68, 98]},
lataxis={'range': [6, 38]}
)
fig.update_layout(
title=dict(
text="Active COVID-19 Cases in India by State as of July 17, 2020",
xanchor='center',
x=0.5,
yref='paper',
yanchor='bottom',
y=1,
pad={'b': 10}
),
margin={'r': 0, 't': 30, 'l': 0, 'b': 0},
height=550,
width=550
)
fig.show()
Note : I could not manage to do it in plotly, but I can do it easily in Bokeh. The OP asked specifically for plotly but still I am posting this answer to show how can be done someother way.
GeoJson of India states is distributed by https://gadm.org/
Load it into GeoJSONDataSource Data Model of Bokeh
Setup the figure and fead in th Data Model
Custom colors can be achived by added the information per germoery/state inside the Datamodel.
Working Code
from bokeh.models import GeoJSONDataSource
from urllib.request import urlopen
import json
from bokeh.models import GeoJSONDataSource, HoverTool, LinearColorMapper
from bokeh.palettes import Viridis256
from bokeh.plotting import figure
from bokeh.io import output_file, show
import matplotlib.pyplot as plt
from bokeh.io import show, output_notebook
%matplotlib
output_notebook()
# Geojson of India
with urlopen("https://raw.githubusercontent.com/geohacker/india/master/state/india_state.geojson") as response:
geojson = json.load(response)
# Round robin over over 3 colors
# You can set the colors here based on the case count you have per state
for i in range(len(geojson['features'])):
geojson['features'][i]['properties']['Color'] = ['blue', 'red', 'green'][i%3]
# Set the hover to state information and finally plot it
cmap = LinearColorMapper(palette=Viridis256)
TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,save"
geo_source = GeoJSONDataSource(geojson=json.dumps(geojson))
p = figure(title='India', tools=TOOLS, x_axis_location=None, y_axis_location=None, width=800, height=800)
p.grid.grid_line_color = None
p.patches('xs', 'ys', fill_alpha=0.7, line_color='black', fill_color='Color', line_width=0.1, source=geo_source)
hover = p.select_one(HoverTool)
hover.point_policy = 'follow_mouse'
hover.tooltips = [('State:', '#NAME_1')]
show(p)
Output:
As mentioned in the code comments above, you can add the case information to the states in the datamodel and set it to hovertool. This way when you hover over sates you will see the case count. In fact you can just add what ever info you want to the states inside the datamodel and use the datamodel to render them.
Sorry but you cannot do that as the location mode has only 3 values:
“ISO-3” , “USA-states” , “country names”
and the geo of layout can only have the 7 values for scope -“world” | “usa” | “europe” | “asia” | “frica” | “north america” | “south america”.
so in order to get a plot of India you need to get a plot of asia in which india would be marked but there is no option for a separate plot of India and states.
data = dict(type = 'choropleth',
locations = ['india'],
locationmode = 'country names',
colorscale= 'Portland',
text= ['t1'],
z=[1.0],
colorbar = {'title' : 'Colorbar Title'})
layout = dict(geo = {'scope': 'asia'})
this colud give you asia map with India marked.
I'm using Plotly Dash to build a stacked bar chart with 3 trace values.
I'm trying to access the state of the trace values so that I can filter a dataframe and pass the resulting DF back to the plot, as opposed to simply hiding the traces on de-select.
for example, I have a dataframe :
Item Status Value
1 First 2000
1 Second 3490
1 Third 542
2 First 641
2 Second 564
3 First 10
My traces are 3 values (first, Second, Third) pertaining to a linear process where each value is a status marking the advancement of an item.
My intention is to be able to select statuses from further down the progression so only those items that have advanced to a certain step are plotted.
As I select more advanced statuses in the trace legend, my plotted x-values should drop off since fewer advance that far, even though they all share the majority of the statuses
The only solution I can think of is to make checkboxes for each trace value and use those inputs in a callback, but that seems redundant to the select/de-select traces functionality built in.
You looking for something like that?
Code:
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly
import plotly.graph_objs as go
import pandas as pd
app = dash.Dash(__name__)
df = pd.DataFrame({'Item': [1, 1, 1, 2, 2, 3],
'Status': ["First", "Second", "Third",
"First", "Second", "First"],
'Value': [2000, 3490, 542, 641, 564, 10]})
colors = {
'background': '#111111',
'background2': '#FF0',
'text': '#7FDBFF'
}
df1 = df.loc[df["Status"] == "First"]
df2 = df.loc[df["Status"] == "Second"]
df3 = df.loc[df["Status"] == "Third"]
trace1 = go.Bar(
x=df1["Item"],
y=df1["Value"],
name='First',
)
trace2 = go.Bar(
x=df2["Item"],
y=df2["Value"],
name='Second',
)
trace3 = go.Bar(
x=df3["Item"],
y=df3["Value"],
name='Third',
)
app.layout = html.Div(children=[
html.Div([
html.H5('Your Plot'),
dcc.Graph(
id='cx1',
figure=go.Figure(data=[trace1, trace2, trace3],
layout=go.Layout(barmode='stack')))],)])
if __name__ == '__main__':
app.run_server(debug=True)
Output: