I get different results when trying to plot the identical data with mathplotlib and plotly. Plotly doesn't show me the whole data range.
import plotly.plotly as py
import plotly.graph_objs as go
# filter the data
df3 = df[df.line_item_returned==0][['created_at', 'line_item_price']].copy()
# remove the time part from datetime
df3.created_at = df3.created_at.dt.floor('d')
# set the datatime column as index
df3 = df3.set_index('created_at')
# Create traces
trace0 = go.Scatter(
x = df3.index,
y = df3.line_item_price.resample('d').sum().rolling(90, center=True).mean(),
mode = 'markers',
name = 'markers'
)
data = [trace0]
py.iplot(data, filename='scatter-mode')
The chart shows only the range Oct-Dec 2018.
Plotting the same data with matplotlib shows the whole data range 2016-2018:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(df3.line_item_price.resample('d').sum().rolling(90, center=True).mean())
The index contains all data 2016-2018:
df3.line_item_price.resample('d').sum().rolling(31, center=True).mean().index
DatetimeIndex(['2015-11-18', '2015-11-19', '2015-11-20', '2015-11-21',
'2015-11-22', '2015-11-23', '2015-11-24', '2015-11-25',
'2015-11-26', '2015-11-27',
...
'2018-12-10', '2018-12-11', '2018-12-12', '2018-12-13',
'2018-12-14', '2018-12-15', '2018-12-16', '2018-12-17',
'2018-12-18', '2018-12-19'],
dtype='datetime64[ns]', name='created_at', length=1128, freq='D')
Why is this happening?
I guess it's a problem with indices.
%matplotlib inline
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np
N = 2000
df = pd.DataFrame({"value":np.random.randn(N)},
index=pd.date_range(start='2015-01-01', periods=N))
# you don't really need to us `plt`
df.resample('d').sum().rolling(90, center=True).mean().plot();
But then if you want to use plotly you should use the index from the resampled Series.
df_rsmpl = df.resample('d').sum().rolling(90, center=True).mean()
trace0 = go.Scatter(x = df_rsmpl.index,
y = df_rsmpl["value"])
data = [trace0]
py.iplot(data)
Related
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
corona_data = pd.read_csv("서울시 코로나19 확진자 현황 csv.csv", encoding="cp949")
confirmed_dates = corona_data["확진일"]
confirmed_date = [datetime.strptime(date, "%Y-%m-%d") for date in confirmed_dates]
corona_data["확진일"]= confirmed_date
plt.rc('font', family='Malgun Gothic')
corona_data["확진일"].plot(title="확진일 별 확진자 추이")
plt.show()
This plot show x-axis is just number and y-axis is date but I wanna change x-axis is date and y-axis is number how can I solve it?
If your data is in a dataframe, I recommend using Seaborn to visualize it. It has a great API that allows you to plot elements of your dataframe by referening column names. Here is a toy example:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
# Load data
df = pd.read_csv(...)
# Plot scatter plot
sns.scatter(x='col_1', y='col_2', data=df)
plt.show()
Check out the Seaborn documentation for more
The problem seems to be that your dataframe only contains one dataset which are the dated. You could add a column that contains the row numbers and then select what you want to have on x and y axis by passing the column name to the plot function:
import matplotlib.pyplot as plt
from datetime import datetime
corona_data = pd.read_csv("서울시 코로나19 확진자 현황 csv.csv", encoding="cp949")
confirmed_dates = corona_data["확진일"]
confirmed_date = [datetime.strptime(date, "%Y-%m-%d") for date in confirmed_dates]
corona_data["확진일"]= confirmed_date
# now add the numbers to the dataset
corona_data["numbers"]=[i for i in len(confirmed_dates)]
plt.rc('font', family='Malgun Gothic')
# and tell the plot function that you want "확진일" as x ans "numbers" as y axis
corona_data.plot("확진일","numbers",title="확진일 별 확진자 추이")
plt.show()```
My goal is to create an animation with my 3D data in plotly.
I have 3 variables x,y,z for simplicity and I plot the 4th value depending on these x,y,z.
I create a 3D scatter plot where the 4th dim sort to speak is the color like this:
from numpy import genfromtxt
import numpy as np
import plotly.io as pio
import plotly.express as px
pio.renderers.default = 'notebook'
import plotly.graph_objects as go
import math
import pandas as pd
data = pd.read_csv("paramtp_1e-05_big.txt")
data.head()
data = data.iloc[::10, :]
color_data = data['gopt'].astype(float).round(decimals=2)
color_data[color_data>= 10] = 10
color_data_nopt = data['nopt'].astype(float).round(decimals=3)
color_data_mc = data['mc'].astype(float).round(decimals=3)
color_data_P= data['P']
color_data_P[color_data_P >= 1] = 1
data= data.replace(np.nan, '', regex=True)
data.tail()
fig = px.scatter_3d(data, x='NpN0', y='s', z='mu',log_x=True, log_z=True,
opacity = 0.5,
color=color_data,color_continuous_scale=px.colors.sequential.Viridis)
fig.add_trace(
go.Scatter(
mode='markers',
marker=dict(
size=1,
opacity=0.5,
),
)
)
fig.show()
Similarly to this wonderful animation: https://plotly.com/python/visualizing-mri-volume-slices/
I would like to slice up my data to isosurfaces with respect to any x,y,z coordinates.
As in the example they use images, I could not wrap my head around to create the same with my raw data.
Thank you in advance.
I am trying to draw a bar chart from the CSV data I transform using pivot_table. The bar chart should have the count on the y-axis and companystatus along the x-axis.
I am getting this instead:
Ultimately, I want to stack the bar by CompanySizeId.
I have been following this video.
import plotly.graph_objects as go
import plotly.offline as pyo
import pandas as pd
countcompany = pd.read_csv(
'https://raw.githubusercontent.com/redbeardcr/Plotly/master/Data/countcompany.csv')
df = pd.pivot_table(countcompany, index='CompanyStatusLabel',
values='n', aggfunc=sum)
print(df)
data = [go.Bar(
x=df.index,
y=df.values,
)]
layout = go.Layout(title='Title')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)
Code can be found here
Thanks for any help
If you flatten the array with the y values, i.e. if you replace y=df.values with y=df.values.flatten(), your code will work as expected.
import plotly.graph_objects as go
import plotly.offline as pyo
import pandas as pd
countcompany = pd.read_csv('https://raw.githubusercontent.com/redbeardcr/Plotly/master/Data/countcompany.csv')
df = pd.pivot_table(countcompany, index='CompanyStatusLabel', values='n', aggfunc=sum)
data = [go.Bar(
x=df.index,
y=df.values.flatten(),
)]
layout = go.Layout(title='Title')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)
I was trying to create some heatmap using plotly3.10 and I encountered one problem that the
column names are not displayed full in ylabel.
import pandas as pd
import plotly.figure_factory as ff
from plotly.offline import plot, iplot, init_notebook_mode
df = pd.util.testing.makeDataFrame()
df.columns = ['this_is_long_column_name','another_column_name','yet_another_column_name','price']
df_corr = df.corr()
z = df_corr.values
fig = ff.create_annotated_heatmap(z,showscale=True,
x=df_corr.columns.values.tolist(),
y=df_corr.columns.values.tolist()
)
iplot(fig)
I got this image:
Question
How to show the full column name in ylabels?
How to show xlabel on both top and bottom with larger fontsizes?
How to show only 2 significant numbers, like df.round(2) only in plot?
Have you tried manually specifying the margins? E.g.:
import plotly.graph_objs as go
layout = go.Layout(
margin=dict(l=80, r=80, t=100, b=80)
)
This might work for you:
import numpy as np
import pandas as pd
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
df = pd.util.testing.makeDataFrame()
df.columns = ['this_is_long_column_name','another_column_name','yet_another_column_name','price']
df_corr = df.corr()
z = df_corr.round(2).values
fig = ff.create_annotated_heatmap(z,showscale=True,
x=df_corr.columns.values.tolist(),
y=df_corr.columns.values.tolist()
)
layout = go.Layout(margin=dict(l=200, r=50, t=100, b=50))
fig.layout.update(layout)
iplot(fig)
Gives:
I need to create a line chart from multiple columns of a dataframe. In pandas, you can draw a multiple line chart using a code as follows:
df.plot(x='date', y=['sessions', 'cost'], figsize=(20,10), grid=True)
How can this be done using plotly_express?
With version 4.8 of Plotly.py, the code in the original question is now supported almost unmodified:
pd.options.plotting.backend = "plotly"
df.plot(x='date', y=['sessions', 'cost'])
Previous answer, as of July 2019
For this example, you could prepare the data slightly differently.
df_melt = df.melt(id_vars='date', value_vars=['sessions', 'cost'])
If you transpose/melt your columns (sessions, cost) into additional rows, then you can specify the new column 'variable' to partition by in the color parameter.
px.line(df_melt, x='date' , y='value' , color='variable')
Example plotly_express output
With newer versions of plotly, all you need is:
df.plot()
As long as you remember to set pandas plotting backend to plotly:
pd.options.plotting.backend = "plotly"
From here you can easily adjust your plot to your liking, for example setting the theme:
df.plot(template='plotly_dark')
Plot with dark theme:
One particularly awesome feature with newer versions of plotly is that you no longer have to worry whether your pandas dataframe is of a wide or long format. Either way, all you need is df.plot(). Check out the details in the snippet below.
Complete code:
# imports
import plotly.express as px
import pandas as pd
import numpy as np
# settings
pd.options.plotting.backend = "plotly"
# sample dataframe of a wide format
np.random.seed(4); cols = list('abc')
X = np.random.randn(50,len(cols))
df=pd.DataFrame(X, columns=cols)
df.iloc[0]=0; df=df.cumsum()
# plotly figure
df.plot(template = 'plotly_dark')
Answer for older versions:
I would highly suggest using iplot() instead if you'd like to use plotly in a Jupyter Notebook for example:
Plot:
Code:
import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd
import numpy as np
# setup
init_notebook_mode(connected=True)
np.random.seed(123)
cf.set_config_file(theme='pearl')
# Random data using cufflinks
df1 = cf.datagen.lines()
df2 = cf.datagen.lines()
df3 = cf.datagen.lines()
df = pd.merge(df1, df2, how='left',left_index = True, right_index = True)
df = pd.merge(df, df3, how='left',left_index = True, right_index = True)
fig = df1.iplot(asFigure=True, kind='scatter',xTitle='Dates',yTitle='Returns',title='Returns')
iplot(fig)
Its also worth pointing out you can combine plotly express with graph_objs. This is a good route when the lines have different x points.
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
# data set 1
x = np.linspace(0, 9, 10)
y = x
# data set 2
df = pd.DataFrame(np.column_stack([x*0.5, y]), columns=["x", "y"])
fig = go.Figure(px.scatter(df, x="x", y="y"))
fig.add_trace(go.Scatter(x=x, y=y))
fig.show()