Python Plotly bar chart count items from csv - python

I have a csv file with the following structure:
I wrote this code:
import pandas as pd
import plotly.express as px
input_file = "inf.csv"
df = pd.read_csv(input_file)
fig = px.bar(df,
x='Date',
y='User',
title='Test',
color='Items',
barmode='stack')
fig.show()
and this is the output:
I would like to put on Y axis not the Users, but a number which counts how many users exists in the same day.
How can I do that?

You can get your desired data structure using df.groupby('Date').count().reset_index().
Plot:
Code:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio
#pio.renderers.default = 'jupyterlab'
# Sample data
d={'Date': ['01/08/2019', '01/08/2019', '07/08/2019', '12/08/2019',
'26/08/2019', '29/08/2019', '29/08/2019'],
'User':['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7'],
'Items': ['Pen', 'Ruler', 'Rubber', 'Rubber', 'Ruler', 'Ruler', 'Pen']
}
# data strucutre
df=pd.DataFrame(d)
dfg=df.groupby('Date').count().reset_index()
dfg=dfg.rename(columns={"User": "Users"})
# plot structure
fig = px.bar(dfg,
x='Date',
y='Users',
title='Test',
#color='Items',
barmode='stack')
# plot
fig.show()

Related

Empty map when using px.choropleth mapbox

I try to display a map using json but this is the first time and I am probably using it wrong because when using px.choropleth alone my map is showing.
But I would like to use px.choropleth mapbox for a more elaborate map.
Here is the code below
Thanks for your help
import pandas as pd
import matplotlib
import folium
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import plot
import json
import plotly.io as pio
Final = pd.read_excel('/Users/Desktop/Data_Extract_From_Indicateurs_du_développement_dans_le_monde (3).xlsx')
DataSet.head()
worldmap = json.load(open("/Users/Desktop/countries.geojson",'r'))
worldmap['features'][1]['properties']
world_id_map = {}
for feature in worldmap['features']:
feature['id'] = feature['properties']['ADMIN']
world_id_map[feature['properties']['ISO_A3']] = feature['id']
world_id_map = json.loads(worldmap)
figmap = px.choropleth(Final,
locations='Country Code',
color='CO2 emissions',
color_continuous_scale="Algae",
animation_frame='Date',
range_color=[20,10],
title='Worldwilde CO2 Emissions per habitant',
)
plot(figmap)
fig = go.Figure(go.Choroplethmapbox(geojson=world_id_map,
locations=Final['Country Code'],
z=Final['CO2 emissions'],
colorscale='algae', zmin=0, zmax=35,
colorbar_title = "CO2 emissions",
marker_opacity=0.5, marker_line_width=0.2))
fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
plot(fig)
have simulated your data frame and dynamically sourced heojson from GitHub
there is not need to manipulate the geojson all you need to do is pass the featureidkey parameter to link to locations
have left code in place for producing choropleth and choropleth_mapbox
import pandas as pd
import plotly.express as px
from plotly.offline import plot
import plotly.io as pio
import requests
import numpy as np
# Final = pd.read_excel('/Users/Desktop/Data_Extract_From_Indicateurs_du_développement_dans_le_monde (3).xlsx')
# DataSet.head()
# worldmap = json.load(open("/Users/Desktop/countries.geojson",'r'))
# get countries from internet...
worldmap = requests.get(
"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/geojson/ne_110m_admin_0_countries.geojson"
).json()
# simulate dataframe, don't to local file
Final = pd.DataFrame(
{
"Country Code": [f["properties"]["ADM0_A3"] for f in worldmap["features"]],
"Date": np.tile(
pd.date_range("1-jan-2015", freq="Y", periods=7).strftime("%Y-%m-%d"), 100
)[0 : len(worldmap["features"])],
"CO2 emissions": np.random.uniform(10, 20, len(worldmap["features"])),
}
)
figmap = px.choropleth(
Final,
locations="Country Code",
color="CO2 emissions",
color_continuous_scale="Algae",
animation_frame="Date",
range_color=[20, 10],
title="Worldwilde CO2 Emissions per habitant",
)
figmap = px.choropleth_mapbox(
Final,
locations="Country Code",
geojson=worldmap,
featureidkey="properties.ADM0_A3",
color="CO2 emissions",
color_continuous_scale="Algae",
animation_frame="Date",
range_color=[20, 10],
title="Worldwilde CO2 Emissions per habitant",
mapbox_style="carto-positron",
).update_layout(mapbox={"zoom":2})
plot(figmap)

Plotly: Range slider not being displayed for row count > 500

As is visible from the image, the scaffolding for the rangeslider is generated but the trace inside it is not. It is also fully functional otherwise. With some experiment, I found that only if you set the no. of rows to 500 or less, it displays correctly. Is there a way to display it for rows more than that? Here is the code to reproduce-
size = 501 #change this to change no. of rows
import numpy as np
import pandas as pd
import plotly.express as px
df = {'date': pd.date_range(start='2021-01-01', periods=size, freq='D'),
'new_cases': np.random.random(size=size),
'new_cases_smoothed': np.random.random(size=size)}
df = pd.DataFrame(df)
fig = px.line(df, x='date', y=['new_cases','new_cases_smoothed'])
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="date"))
fig.show()
For others using plotly.express, I had luck setting the kwarg render_mode='webg1':
size = 501 #change this to change no. of rows
import numpy as np
import pandas as pd
import plotly.express as px
df = {'date': pd.date_range(start='2021-01-01', periods=size, freq='D'),
'new_cases': np.random.random(size=size),
'new_cases_smoothed': np.random.random(size=size)}
df = pd.DataFrame(df)
fig = px.line(df, x='date', y=['new_cases','new_cases_smoothed'], render_mode='webg1')
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="date"))
fig.show()
This works in graph_objects
size = 501 #change this to change no. of rows
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = {'date': pd.date_range(start='2021-01-01', periods=size, freq='D'),
'new_cases': np.random.random(size=size),
'new_cases_smoothed': np.random.random(size=size)}
df = pd.DataFrame(df)
# fig = px.line(df, x='date', y=['new_cases','new_cases_smoothed'])
fig = go.Figure(data=[go.Scatter(x=df["date"], y=df[c], name=c) for c in ['new_cases','new_cases_smoothed']])
fig.update_layout(xaxis={"rangeslider":{"visible":True},"type":"date",
"range":[df.tail(50)["date"].min(),df.tail(50)["date"].max()]})
fig.show()
Interesting, you typed WEBG1 instead of WEBGL and it worked.
If you input WEBGL it doesn't work.
In fact if you type anything that should not be accepted as valid such as just blank (render_mode='') it works as well.
Go figure...

How to interact with plotly.figure_factory hover?

I tried the following code:
import plotly.io as pio
import plotly.express as px
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
df = px.data.carshare()
fig = go.Figure()
app = dash.Dash()
#fac figurile
fig = ff.create_hexbin_mapbox(df,lat = 'centroid_lat', lon = 'centroid_lon',nx_hexagon = 10,color = 'car_hours',
labels = {'color':'Point Count '},
opacity = 0.5)
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
fig.show()
And it displays:
And I want to modify the hover so that it will only show me the float value with only the first decimal on hover and I also want to be able to display something after it displays the value. For example the value on the hover should be 'Point Count = 1019.9 cars per hour. Unfortunately, the documentation does not help very much.
It seems to me that your best option for ff.create_hexbin_mapbox would be to configure it directly through:
fig.data[0].hovertemplate = 'Point Count =%{z:,.1f}<extra>Cars per hour</extra>'
Which will turn this:
... into this:
Complete code
import plotly.io as pio
import plotly.express as px
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
df = px.data.carshare()
fig = go.Figure()
# app = dash.Dash()
#fac figurile
fig = ff.create_hexbin_mapbox(df,lat = 'centroid_lat', lon = 'centroid_lon',nx_hexagon = 10,color = 'car_hours',
labels = {'color':'Point Count '},
opacity = 0.5)
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
fig.data[0].hovertemplate = 'Point Count =%{z:,.1f}<extra>Cars per hour</extra>'
fig.show()

Plotly doesn't draw barchart from pivot

I am trying to draw a bar chart from the CSV data I transform using pivot_table. The bar chart should have the count on the y-axis and companystatus along the x-axis.
I am getting this instead:
Ultimately, I want to stack the bar by CompanySizeId.
I have been following this video.
import plotly.graph_objects as go
import plotly.offline as pyo
import pandas as pd
countcompany = pd.read_csv(
'https://raw.githubusercontent.com/redbeardcr/Plotly/master/Data/countcompany.csv')
df = pd.pivot_table(countcompany, index='CompanyStatusLabel',
values='n', aggfunc=sum)
print(df)
data = [go.Bar(
x=df.index,
y=df.values,
)]
layout = go.Layout(title='Title')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)
Code can be found here
Thanks for any help
If you flatten the array with the y values, i.e. if you replace y=df.values with y=df.values.flatten(), your code will work as expected.
import plotly.graph_objects as go
import plotly.offline as pyo
import pandas as pd
countcompany = pd.read_csv('https://raw.githubusercontent.com/redbeardcr/Plotly/master/Data/countcompany.csv')
df = pd.pivot_table(countcompany, index='CompanyStatusLabel', values='n', aggfunc=sum)
data = [go.Bar(
x=df.index,
y=df.values.flatten(),
)]
layout = go.Layout(title='Title')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

Plotly xlabel and ylabel names are cropped how to make them appear full?

I was trying to create some heatmap using plotly3.10 and I encountered one problem that the
column names are not displayed full in ylabel.
import pandas as pd
import plotly.figure_factory as ff
from plotly.offline import plot, iplot, init_notebook_mode
df = pd.util.testing.makeDataFrame()
df.columns = ['this_is_long_column_name','another_column_name','yet_another_column_name','price']
df_corr = df.corr()
z = df_corr.values
fig = ff.create_annotated_heatmap(z,showscale=True,
x=df_corr.columns.values.tolist(),
y=df_corr.columns.values.tolist()
)
iplot(fig)
I got this image:
Question
How to show the full column name in ylabels?
How to show xlabel on both top and bottom with larger fontsizes?
How to show only 2 significant numbers, like df.round(2) only in plot?
Have you tried manually specifying the margins? E.g.:
import plotly.graph_objs as go
layout = go.Layout(
margin=dict(l=80, r=80, t=100, b=80)
)
This might work for you:
import numpy as np
import pandas as pd
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
df = pd.util.testing.makeDataFrame()
df.columns = ['this_is_long_column_name','another_column_name','yet_another_column_name','price']
df_corr = df.corr()
z = df_corr.round(2).values
fig = ff.create_annotated_heatmap(z,showscale=True,
x=df_corr.columns.values.tolist(),
y=df_corr.columns.values.tolist()
)
layout = go.Layout(margin=dict(l=200, r=50, t=100, b=50))
fig.layout.update(layout)
iplot(fig)
Gives:

Categories

Resources