How can I plot an interactive 3d scatterplot from a pandas dataframe? - python

Pandas provides builtin plotting functionality for DataFrames with several plotting backend engines (matplotlib, etc.). Id like to plot an interactive 3D scatterplot directly from a dataframe via df.plot() but came up with non-interactive plots only. I'm thinking of something I get when e.g. plotly. I'd prefer a solution which is independent of exploratory data analysis IDE setup dependencies (e.g. ipywidget when using JupyterLab). How can I plot interactive 3D scatter plots via df.plot()?

plotly is the way to go...use scatter3d
import plotly as py
import plotly.graph_objs as go
import numpy as np
import pandas as pd
# data
np.random.seed(1)
df = pd.DataFrame(np.random.rand(20, 3), columns=list('ABC'))
trace = go.Scatter3d(
x=df['A'],
y=df['B'],
z=df['C'],
mode='markers',
marker=dict(
size=5,
color=c,
colorscale='Viridis',
),
name= 'test',
# list comprehension to add text on hover
text= [f"A: {a}<br>B: {b}<br>C: {c}" for a,b,c in list(zip(df['A'], df['B'], df['C']))],
# if you do not want to display x,y,z
hoverinfo='text'
)
layout = dict(title = 'TEST',)
data = [trace]
fig = dict(data=data, layout=layout)
py.offline.plot(fig, filename = 'Test.html')

Holding to the df.plot() approach there is an df.iplot() with Cufflinks and Plotly.

Related

Plotly backend with pandas

Hi I would like to do multiple graph (2 indeed) with pandas / backend Plotly.
I don't know how to proceed.
and what are the main option to change the size of my graph (it seems that figsize does not work) ? the color ?
I did something like that:
import pandas as pd
import matplotlib.pyplot as plt
pd.options.plotting.backend = "plotly"
f1 = data.plot(y=['vl','bench'], title='Fonds vs Bench')
f2 = data.plot(y='aum', title='AuM du fonds')
f1.show(figsize=(8,5))
f2.show(figsize=(8,5))
and would like something equivalent of (without Plotly backend):
f, (ax1,ax2) = plt.subplots(2, 1, figsize=(8,5), sharex=True)
data.plot(y=['vl', 'bench'], title='Fonds vs Bench', ax=ax1)
data.plot(y='aum', title='AuM du fonds',ax=ax2);
you can use the subplot method to create multiple subplots in a single figure. Also, you can use the update_layout method to change the size of the figure and the update_traces method to change the color of the traces.
Example:
import pandas as pd
import plotly.express as px
# set Plotly as the backend for pandas
pd.options.plotting.backend = "plotly"
# create a figure with two subplots
fig = px.subplot(rows=2, cols=1)
# add the first plot to the first subplot
fig.add_trace(px.line(data, x=data.index, y='vl', name='vl'))
fig.add_trace(px.line(data, x=data.index, y='bench', name='bench'))
fig.update_layout(title_text='Fonds vs Bench')
# add the second plot to the second subplot
fig.add_trace(px.line(data, x=data.index, y='aum', name='aum'))
fig.update_layout(title_text='AuM du fonds', row=2, col=1)
# change the size of the figure
fig.update_layout(width=800, height=500)
# change the color of one of the traces
fig.update_traces(line=dict(color='red'), selector=dict(name='vl'))
# show the figure
fig.show()
Let me know if that works for you.
Reference: https://plotly.com/python/
When creating a graph from Pandas using plotly, if you want to customize the content of the graph, you can use plotly to modify it. To modify the graph size, specify the width and height in pixels. If the desired size is 8 inches wide by 5 inches high, matplotlib's default dpi is 100 pixels, so we specified 800 pixels and 500 pixels.
import pandas as pd
import numpy as np
pd.options.plotting.backend = "plotly"
np.random.seed(20220212)
data = pd.DataFrame({'vl': np.random.randint(0,25,5),
'bench': np.random.randint(25,50,5),
'aum': np.random.randint(50,75,5)})
f1 = data.plot(y=['vl','bench'], title='Fonds vs Bench')
f2 = data.plot(y=['aum'], title='AuM du fonds')
f1.update_layout(autosize=False, width=800, height=500)
f2.update_layout(autosize=False, width=800, height=500)
f1.show()
f2.show()
Use matplotlib and seaborn:
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(20, 15))
sns.boxplot(x = 'bedrooms', y = 'price', data = dataset_df)
if you want two and more plot use
fig, ax = plt.subplots(2,2, figsize=(20, 15))
And use ax=ax[0,1]

How to add a secondary Y axis to a Plotly Express bar plot?

I would like to add a second Y axis to my bar plot bellow, that is the number of citizens in integer:
this graph was made using plotly:
import plotly.express as px
fig = px.bar(df, x="country",y="pourcent_visit",color="city",barmode='group')
# fig.add_hline(y=10)
fig.show()
To my knowledge, there's no direct way to do this. But you can easily build a Plotly Express figure, grab the traces (and data structures) from there and combine them in a figure that allows multiple axes using fig = make_subplots(specs=[[{"secondary_y": True}]]). With no provided data sample, I'll use the built-in dataset px.data.tips() that I'm guessing to a large part resembles the structure of your real world dataset judging by the way you've applied the arguments in px.bar(). Details in the comments, but please don't hesitate to let me know if something is unclear.
Plot:
Complete code:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# sample data
df = px.data.tips()
# figure setup with multiple axes
fig = make_subplots(specs=[[{"secondary_y": True}]])
# build plotly express plot
fig2 = px.bar(df, x="day", y="total_bill", color="smoker", barmode="group")
# add traces from plotly express figure to first figure
for t in fig2.select_traces():
fig.add_trace(t, secondary_y = False)
# handle data for secondary axis
df2 = df.groupby('day').agg('sum')#.reset_index()
df2 = df2.reindex(index = df['day'].unique()).reset_index()
#
fig.add_trace(go.Scatter(x = df2['day'], y = df2['size'], mode = 'lines'), secondary_y = True)
# fix layout
fig.update_layout(legend_title_text = 'smoker')
fig.show()

Python Plotly Express Scatter Plot

I want to create an interactive scatter plot; so I am using the plotly.graph_objects module.
My data has two columns of about 100 points.
When I make a line plot, I have no problem.
But when I try to make a scatter plot, Jupyter seems to hang (message at the bottom says - Local Host not responding)
It takes a while for Jupyter to respond and I still have no plot.
The code I am using is:
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
var_list = ['cloxth1 ()','cloxth2 ()']
for item in var_list:
stripped_item = item.replace(' ()','')
fig.add_trace(go.Scatter(
x=np.linspace(0,len(df),len(df)),
y=df[item],
mode='markers',
marker={'size':1},
name = item
))
fig.update_layout(title = 'CLOXTH',
xaxis_title = 'data samples',
yaxis_title = 'mV')
fig.show()
Is there anything wrong with the way I am using go.Scatter?
I tried using px.scatter instead. It seems to work, as in I get a scatter plot. But in the plotly.express case I am unable to have a proper legend for 'cloxth1' and 'cloxth2'; also, both data sets are plotted with the same color.
How can I get around this?
A few rows from the data:
Sample Data
# read in with
df = pd.read_clipboard(sep=',', index_col=[0])
# copy to clipboard
,time(s),Filename,time_stamp,time_vector(ms),time_vector_zerobased(ms),cloxth1(),cloxth2()
0.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:03.8,0,0,725.9097285,725.9097285
1.001,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:04.8,1001,1001,725.9097285,725.9097285
2.001,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:05.8,2001,2001,725.9097285,725.9097285
3.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:06.8,3002,3002,725.9097285,725.9097285
4.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:07.8,4000,4000,725.9097285,725.9097285
5.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:08.8,5002,5002,725.9097285,725.9097285
6.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:09.8,6002,6002,725.9097285,725.9097285
7.001,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:10.8,7001,7001,725.9097285,725.9097285
8.003,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:11.8,8003,8003,725.9097285,725.9097285
9.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:12.8,9002,9002,725.9097285,725.9097285
10.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:13.8,10000,10000,725.9097285,725.9097285
11.005,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:14.8,11005,11005,725.9097285,725.9097285
12.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:15.8,12000,12000,725.9097285,725.9097285
13.001,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:16.8,13001,13001,725.9097285,725.9097285
14.003,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:17.8,14003,14003,725.9097285,725.9097285
15.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:18.8,15000,15000,725.9097285,725.9097285
16.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:19.8,16002,16002,725.9097285,725.9097285
17.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:20.8,17000,17000,725.9097285,725.9097285
18.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:21.8,18000,18000,725.9097285,725.9097285
19.003,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:22.8,19003,19003,725.9097285,725.9097285
20.001,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:23.8,20001,20001,725.9097285,725.9097285
21.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:24.8,21000,21000,725.9097285,725.9097285
22.005,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:25.8,22005,22005,725.9097285,725.9097285
23.0,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:26.8,23000,23000,725.9097285,725.9097285
24.002,4DRBUP1N8HB706662_Trip-Detail_2020-07-20,00-04-03.csv.zip,04:27.8,24002,24002,725.9097285,725.9097285

Plotly yaxis2 manual scaling

I have a plotly-dash dashboard and I can't seem to rescale my secondary y-axis. Is there a way of doing this?
I've tried messing with the domain parameter and the range parameter in the go.Layout.
I need the volume bar chart to be scaled down and occupy maybe 10% of the height of the plot so it doesn't overlap with my candlesticks.
Thank you very much.
Any help is appreciated.
import pandas as pd
import pandas_datareader.data as web
import plotly.offline as pyo
import plotly.graph_objs as go
stock_ticker='AAPL'
start_date='2019-04-01'
end_date='2019-05-22'
data=[]
hist_stock_df = web.DataReader(stock_ticker,'iex',start_date, end_date)
data.append(go.Candlestick(x=hist_stock_df.index,
open=hist_stock_df['open'],
high=hist_stock_df['high'],
low=hist_stock_df['low'],
close=hist_stock_df['close'],
name='AAPL'))
data.append(go.Bar(x=hist_stock_df.index,
y=hist_stock_df['volume'].values,
yaxis='y2'))
#y0=1000000
layout=go.Layout(title= 'Candestick Chart of AAPL',
xaxis=dict(title='Date',rangeslider=dict(visible=False)),
yaxis=dict(title='Price'),
plot_bgcolor='#9b9b9b',
paper_bgcolor='#9b9b9b',
font=dict(color='#c4c4c4'),
yaxis2=dict(title='Volume',
overlaying='y',
side='right'))
#scaleanchor='y'))
#scaleratio=0.00000001,
#rangemode='tozero',
#constraintoward='bottom',
#domain=[0,0.1]))
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)
I have tried messing with the commented parameters
UPDATE
With this combination of layout parameters I managed to rescale the bars, but now there are two x-axis, been trying to figure out how to bring the middle x-axis down.
layout=go.Layout(title= 'Candestick Chart of AAPL',
xaxis=dict(title='Date',rangeslider=dict(visible=False)),
yaxis=dict(title='Price'),
plot_bgcolor='#9b9b9b',
paper_bgcolor='#9b9b9b',
font=dict(color='#c4c4c4'),
yaxis2=dict(title='Volume',
overlaying='y',
side='right',
scaleanchor='y',
scaleratio=0.0000001))
Use secondary_y=True or secondary_y=False within fig.update_yaxes() to specify which axis to adjust.
Plot 1: Without manual adjustments
Plot 2: With manual adjustments
Code:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import datetime
# data
np.random.seed(1234)
numdays=20
dates = pd.date_range('1/1/2020', periods=numdays)
A = (np.random.randint(low=-10, high=10, size=numdays).cumsum()+100).tolist()
B = (np.random.randint(low=0, high=100, size=numdays).tolist())
df = pd.DataFrame({'A': A,'B':B}, index=dates)
# plotly figure setup
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(name='A', x=df.index, y=df['A'].values))
fig.add_trace(go.Bar(name='B', x=df.index, y=df['B'].values), secondary_y=True)
# plotly manual axis adjustments
fig.update_yaxes(range=[50,160], secondary_y=False)
fig.update_yaxes(range=[-10,200], secondary_y=True)
fig.show()

Pandas, matplotlib and plotly - how to fix series legend?

I'm trying to create an interactive plotly graph from pandas dataframes.
However, I can't get the legends displayed correctly.
Here is a working example:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
# sign into the plotly api
py.sign_in("***********", "***********")
# create some random dataframes
dates = pd.date_range('1/1/2000', periods=8)
df1 = pd.DataFrame(np.random.randn(8, 1), index=dates, columns=['A'])
df2 = pd.DataFrame(np.random.randn(8, 1), index=dates, columns=['B'])
df1.index.name = 'date'
df2.index.name = 'date'
Now I attempt to plot the dataframes using plotly.
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
py.iplot_mpl(fig, filename='random')
Notice there is no legend
Edit:
Based on suggestions below I have added an update dict. Although this does display the legend, it messes up the plot itself:
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
update = dict(
layout=dict(
annotations=[dict(text=' ')], # rm erroneous 'A', 'B', ... annotations
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update, filename='random')
Edit 2:
Removing the annotations entry from the layout dict results in the plot being displayed correctly, but the legend is not the y column name, but rather the x column name, the index name of the dataframe
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
update = dict(
layout=dict(
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update, filename='random')
This results in the following plot:
Edit 3:
I have found a way to override the legend text but it seems a bit klunky. Given that I've specified the dataframe column I want to plot:
df1.plot(y='A', ax=ax)
I would have expected that y='A' would result in 'A' being used as the legend label.
It seems this is not the case, and while it is possible to override using the index label, as seen below, it just feels wrong.
Is there a better way to achieve this result?
update = dict(
layout=dict(
showlegend=True,
),
data=[
dict(name='A'),
dict(name='B'),
]
)
py.iplot_mpl(fig, update=update, filename='random')
Legends don't convert well from matplotlib to plotly.
Fortunately, adding a plotly legend to a matplotlib plot is straight forward:
update = dict(
layout=dict(
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update)
See the full working ipython notebook here.
For more information, refer to the plotly user guide.

Categories

Resources