How do I show different yaxis value in a subplot? - python

I want to show different ranges in the first row and different ranges in the second row?
For instance first row can show up to 50 and second can show up to 100?
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# set seed
np.random.seed(41)
#create three different normally distributed datasets
score_array_A = np.random.normal(size = 100, loc = 15, scale=5)
score_array_B = np.random.normal(size = 200, loc = 50, scale=10)
score_array_C = np.random.normal(size = 300, loc = 70, scale=15)
#turn normal arrays into dataframes
#score_data['T(s)']
score_df_A = pd.DataFrame({'T(s)':score_array_A,'D':'2'})
score_df_B = pd.DataFrame({'T(s)':score_array_B,'D':'3'})
score_df_C = pd.DataFrame({'T(s)':score_array_C,'D':'4'})
#concat dataframes together
score_data = pd.concat([score_df_A,score_df_B,score_df_C])
score_data = score_data.assign(Req = np.where(score_data['T(s)']%5 > 1, "1", "5"))
#to plot subplots
px.box(data_frame = score_data
,y = 'T(s)'
,facet_col = 'D'
, facet_row = 'Req'
,facet_col_wrap = 0,
template='simple_white',
width=600,
height=300
)

After creating the figure using Plotly Express update each yaxis so that it is not configured to match the primary yaxis. Have also updated showticklabels
import plotly.express as px
import numpy as np
import pandas as pd
# set seed
np.random.seed(41)
# create three different normally distributed datasets
score_array_A = np.random.normal(size=100, loc=15, scale=5)
score_array_B = np.random.normal(size=200, loc=50, scale=10)
score_array_C = np.random.normal(size=300, loc=70, scale=15)
# turn normal arrays into dataframes
# score_data['T(s)']
score_df_A = pd.DataFrame({"T(s)": score_array_A, "D": "2"})
score_df_B = pd.DataFrame({"T(s)": score_array_B, "D": "3"})
score_df_C = pd.DataFrame({"T(s)": score_array_C, "D": "4"})
# concat dataframes together
score_data = pd.concat([score_df_A, score_df_B, score_df_C])
score_data = score_data.assign(Req=np.where(score_data["T(s)"] % 5 > 1, "1", "5"))
# to plot subplots
fig = px.box(
data_frame=score_data,
y="T(s)",
facet_col="D",
facet_row="Req",
facet_col_wrap=0,
template="simple_white",
width=600,
height=300,
)
fig.update_layout(
{
yax: {"matches": None, "showticklabels": True}
for yax in fig.to_dict()["layout"].keys()
if "yaxis" in yax
}
)

Related

Plotly scatter not drawing line of markers above certain number of data points

I am using Plotly's scatter. I want to have lines surrounding the markers, like in this plot (the black contour):
I want this to happen by default, so I am setting a template like in the below MWE:
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
MARKERS = ['circle', 'cross', 'x', 'triangle-up', 'star', 'hexagram', 'square', 'diamond', 'hourglass', 'bowtie', 'pentagon', 'triangle-down', 'triangle-left', 'triangle-right', 'star-triangle-up', 'star-triangle-down', 'star-square', 'star-diamond', 'diamond-tall', 'diamond-wide', 'triangle-ne', 'triangle-se', 'triangle-sw', 'triangle-nw', 'hexagon', 'hexagon2', 'octagon']
my_template = pio.templates['plotly']
my_template.data.scatter = [
go.Scatter(
marker = dict(
symbol = s,
line = dict(
width = .5,
),
),
error_y = dict(
width = 1,
thickness = .8
)
) for s in MARKERS
]
pio.templates['my_template'] = my_template
pio.templates.default = 'my_template'
import numpy
import pandas
N_SAMPLES = 99 # Set to 9999 and it fails.
fig = px.scatter(
pandas.DataFrame(
{
'x': numpy.random.randn(N_SAMPLES),
'y': numpy.random.exponential(size=N_SAMPLES),
}
),
x = "x",
y = "y",
)
fig.show()
This works perfectly but if the number of points goes beyond certain value, it stops drawing the lines, like this:
This is what happens to me when I change N_SAMPLES to e.g. 9999. How can I get it to work independently of the number of points?
I have Python 3.8.10 and Plotly 5.11.0.
To cope with the large amount of data, WebGL is available, so I used it to draw a scatterplot with blue markers and a line width of 1. N number is 100,000.
Update:
To set the line width of a marker by default, create a dedicated template and set the line width as its content.
import plotly.graph_objects as go
import numpy as np
my_template = go.layout.Template()
my_template.data.scattergl = [go.Scattergl(marker=dict(line_width=0.5))]
N = 100000
fig = go.Figure()
fig.update_layout(template=my_template)
fig.add_trace(go.Scattergl(
x = np.random.randn(N),
y = np.random.exponential(size=N),
mode='markers',
# marker=dict(
# color='blue',
# line_width=1
# )
))
fig.show()

Adding traces in the same subplot messes with bar size [duplicate]

This question already has answers here:
How to plot a colored gantt chart with plotly keeping the correct bar height
(2 answers)
Closed 4 months ago.
I've made a function to generate a Gantt chart in plotly.
def create_plot(self, date_start, date_end):
df = self.filtered_df
# Create a dict for every customer
customers = df['Customer'].unique()
# Pixels per horizontal bar
pixel = 35
# Create a gantt chart for every customer
customer_plots = dict()
heights = []
for customer in customers:
# filter tasks for customer
customer_plot = df.loc[df['Customer'] == customer]
# create timeline
fig = timeline(customer_plot, x_start='StartDate', y='ProdName', x_end='DueDate', text='DocNum', color='Estat')
fig.update_traces(textposition="inside")
customer_plots[customer] = fig
heights.append(5 + len(customer_plot)*pixel)
# Plot them in a shared x_axis
fig_sub = make_subplots(rows=len(customer_plots), row_heights=heights, subplot_titles=list(customer_plots.keys()))
fig_sub.update_xaxes(type='date')
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot['data']:
fig_sub.add_trace(trace, row=i+1, col=1)
fig_sub.update_layout(
autosize=False,
height=sum(heights),
width=1500
)
for ax in fig_sub['layout']:
if ax[:5]=='xaxis':
fig_sub['layout'][ax]['range']=[date_start, date_end]
fig_sub['layout'][ax]['dtick']=86400000.0*7
self.plot = fig_sub
The function px.timeline returns me a tuple of traces for every different color. If there is one single color in the suplot the bars look fine (with the height I need in order to display the inside text)
But when two colors are present, I have to add another trace in the same subplot with the bars of the different colors.
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot['data']:
fig_sub.add_trace(trace, row=i+1, col=1)
When I do so, it messes with the heights of the bars and I can't actually see the text inside:
I'd like them to look the same as the first photo. Not with extra spacing between them. How could this be accomplished?
this as I see it reduces to same question and this answer: color coding gantt on column
you have not provided sample data, so have simulated some. Also question you are asking can be reduced to a much simpler set up
import pandas as pd
import plotly.express as px
import numpy as np
r = np.random.RandomState(22)
df = pd.DataFrame(
{
"Customer": np.repeat(list("ABCDE"), 20),
"StartDate": pd.date_range("1-jan-2020", freq="W-MON", periods=100),
"DueDate": pd.date_range("10-jan-2020", freq="W-FRI", periods=100),
"ProdName": np.tile(["quick", "brown", "fox", "jumped", "over"], 20),
"DocNum": np.tile(range(8, 13), 20),
"Estat": r.choice(list("QRSTUV"), 100),
}
)
fig = px.timeline(
df,
x_start="StartDate",
y="ProdName",
x_end="DueDate",
text="DocNum",
# color="Estat",
hover_data=["Estat"],
facet_row="Customer",
)
# generate a color map for values to color
cmap = {
v: c
for v, c in zip(df["Estat"].unique(), np.tile(px.colors.qualitative.Plotly, 20))
}
# update marker colors using column
fig.for_each_trace(
lambda t: t.update({"marker": {"color": [cmap[a] for a in t["customdata"][:, 0]]}})
)
don't use color plotly express parameter as this generates a trace per value. Save required information into customdata using hover_data parameter
update all traces, set marker_color to an array that maps the values to a color
integrated
your sample code looks like a method in a class. Have synthesized a class and integrated the solution
import pandas as pd
import numpy as np
from plotly.express import timeline
import plotly.express as px
from plotly.subplots import make_subplots
class dummy:
def __init__(self):
r = np.random.RandomState(22)
self.filtered_df = pd.DataFrame(
{
"Customer": np.repeat(list("ABCDE"), 20),
"StartDate": pd.date_range("1-jan-2020", freq="W-MON", periods=100),
"DueDate": pd.date_range("10-jan-2020", freq="W-FRI", periods=100),
"ProdName": np.tile(["quick", "brown", "fox", "jumped", "over"], 20),
"DocNum": np.tile(range(8, 13), 20),
"Estat": r.choice(list("QRSTUV"), 100),
}
)
def create_plot(self, date_start, date_end):
df = self.filtered_df
# Create a dict for every customer
customers = df["Customer"].unique()
# Pixels per horizontal bar
pixel = 35
# Create a gantt chart for every customer
customer_plots = dict()
heights = []
for customer in customers:
# filter tasks for customer
customer_plot = df.loc[df["Customer"] == customer]
# create timeline
fig = timeline(
customer_plot,
x_start="StartDate",
y="ProdName",
x_end="DueDate",
text="DocNum",
hover_data=["Estat"],
)
fig.update_traces(textposition="inside")
customer_plots[customer] = fig
heights.append(5 + len(customer_plot) * pixel)
# Plot them in a shared x_axis
fig_sub = make_subplots(
rows=len(customer_plots),
row_heights=heights,
subplot_titles=list(customer_plots.keys()),
)
fig_sub.update_xaxes(type="date")
for i, (customer, plot) in enumerate(customer_plots.items()):
for trace in plot["data"]:
fig_sub.add_trace(trace, row=i + 1, col=1)
fig_sub.update_layout(autosize=False, height=sum(heights), width=1500)
for ax in fig_sub["layout"]:
if ax[:5] == "xaxis":
fig_sub["layout"][ax]["range"] = [date_start, date_end]
fig_sub["layout"][ax]["dtick"] = 86400000.0 * 7
cmap = {
v: c
for v, c in zip(
df["Estat"].unique(), np.tile(px.colors.qualitative.Plotly, 20)
)
}
fig_sub.for_each_trace(
lambda t: t.update(
{"marker": {"color": [cmap[a] for a in t["customdata"][:, 0]]}}
)
)
self.plot = fig_sub
d = dummy()
d.create_plot(d.filtered_df["StartDate"].min(),d.filtered_df["DueDate"].max())
d.plot

Python Plotly Express: How to conditionally fill an area plot?

I want to plot a time-series area plot, where positive (>= 0) values are filled in one colour and negative (< 0) values are filled in another.
Taking this example:
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv').assign(
PnL = lambda x: x['AAPL.Close'] - 100
)
px.area(
data_frame = df,
x = 'Date',
y = 'PnL',
width = 500,
height = 300
)
I want the parts where PnL goes below 0 to be filled red.
So this is what I tried:
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv').assign(
PnL = lambda x: x['AAPL.Close'] - 100
)
df['sign'] = np.where(df['PnL'] >= 0, 'positive', 'negative')
px.area(
data_frame = df,
x = 'Date',
y = 'PnL',
color = 'sign',
color_discrete_map = {
'positive': 'steelblue',
'negative': 'crimson'
},
width = 500,
height = 300
)
But this gives me:
Which is not exactly what I'm looking for. What's the best way to do this?
This is the best I could do in the time I had patience for it:
import plotly.graph_objects as go
mask = df['PnL'] >= 0
df['PnL_above'] = np.where(mask, df['PnL'], 0)
df['PnL_below'] = np.where(mask, 0, df['PnL'])
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Date'], y=df['PnL_above'], fill='tozeroy'))
fig.add_trace(go.Scatter(x=df['Date'], y=df['PnL_below'], fill='tozeroy'))
Result:
Obviously not ideal, but gets you most of the way there. There are some slight artifacts where the two traces meet, and obviously the line color is still visible when the value is zero.
By adding mode='none' to the two traces, you can remove the line and only render the filled area:

Using multiple sliders to create dynamic chart in bqplt/jupyter

I am trying to plot a dynamic portfolio performance that changes as the weights of the portfolio change
Assume a portfolio has 2 components with a 50% weighting each. I want to show a chart of the portfolio with sliders representing the weights of the components. I then want to be able to slide the values of the weights around and have the portfolio chart dynamically update.
I have done this for a portfolio that consists of one weight but cant figure out how to amend for more than 1 weight - maybe I need a different approach.
Example below substitutes a random df with 1 column in place of my portfolio df - process should be the same.
In terms of this example if the df had 2 columns - how can I get it working with 2 sliders controlling each weight ?
from bqplot import DateScale, LinearScale, Axis, Figure, Lines
from ipywidgets import FloatSlider, VBox
import pandas as pd
import numpy as np
slider = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight A')
df = pd.DataFrame(np.random.randint(0,100,size=(100, 1)), columns=list('A'))
x_sc = LinearScale()
y_sc = LinearScale()
ax_x = Axis(label='Date', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='Price', scale=y_sc, orientation='vertical', grid_lines='solid')
line = Lines(y=df['A'],x=df.index , scales={'x': x_sc, 'y': y_sc}, colors = ['#FF0000'])
line2 = Lines(y=df['A'],x=df.index , scales={'x': x_sc, 'y': y_sc})
fig = Figure(axes=[ax_x, ax_y], marks=[line, line2], title='Price Chart')
def new_chart(value):
new_y = df[['A']]*slider.value
line.y = new_y
slider.observe(new_chart,'value')
VBox([fig,slider])
Not sure if I have understood you. Do you mean this?
from bqplot import DateScale, LinearScale, Axis, Figure, Lines
from ipywidgets import FloatSlider, VBox
import pandas as pd
import numpy as np
slider = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight A')
sliderB = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight B')
df = pd.DataFrame(np.random.randint(0,100,size=(100, 1)), columns=list('A'))
df['B'] = np.random.randint(0,100,size=(100, 1))
x_sc = LinearScale()
y_sc = LinearScale()
ax_x = Axis(label='Date', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='Price', scale=y_sc, orientation='vertical', grid_lines='solid')
line = Lines(y=df['A']+df['B'],x=df.index , scales={'x': x_sc, 'y': y_sc}, colors = ['#FF0000'])
line2 = Lines(y=df['A']+df['B'],x=df.index , scales={'x': x_sc, 'y': y_sc})
fig = Figure(axes=[ax_x, ax_y], marks=[line, line2, ], title='Price Chart')
def new_chart(change):
line.y = df['A']*slider.value + df['B']*sliderB.value
slider.observe(new_chart,'value')
sliderB.observe(new_chart,'value')
VBox([fig,slider,sliderB])

Plotly: How to create a barchart using group by?

I have a dataset as below:
import pandas as pd
data = dict(Pclass=[1,1,2,2,3,3],
Survived = [0,1,0,1,0,1],
CategorySize = [80,136,97,87,372,119] )
I need to create a barchart using plotly in python, which is grouped by Pclass. in each group, i have 2 columns for Survived=0 and Survived=1 and in Y axis i should have the CategorySize. Therefore, i must have 6 bars which are in 3 groups.
Here is what i have tried:
import plotly.offline as pyo
import plotly.graph_objects as go
data = [ go.Bar( x = PclassSurvived.Pclass, y = PclassSurvived.CategorySize ) ]
layout = go.Layout(title= 'Pclass-Survived', xaxis = dict(title = 'Pclass'), yaxis = dict(title = 'CategorySize'),barmode='group' )
fig = go.Figure(data = data, layout = layout)
pyo.plot( fig, filename='./Output/Pclass-Survived.html')
But, it is not what i need.
This could be easily done with Pandas's groupby and Plotly Express.
You should group your data by Pclass and Survived columns, and apply the sum aggregate function to the CategorySize column.
This way you'll get 6 groups, with their aggregate values, and you can easily plot for each group a pair of bar charts (side-byside) thanks to the barmode attribute (by using the 'group' value), you can read more about it in the documentation.
The code:
import pandas as pd
import plotly.express as px
data = pd.DataFrame(
dict(
Pclass=[1, 1, 2, 2, 3, 3],
Survived=[0, 1, 0, 1, 0, 1],
CategorySize=[80, 136, 97, 87, 372, 119],
)
)
Now you group the data:
grouped_df = data.groupby(by=["Pclass", "Survived"], as_index=False).agg(
{"CategorySize": "sum"}
)
And convert the Survived column values to strings (so plotly treat it as a discrete variable, rather than numeric variable):
grouped_df.Survived = grouped_df.Survived.map({0: "Died", 1: "Survived",})
Now, you should have:
Pclass
Survived
CategorySize
0
1
Died
80
1
1
Survived
136
2
2
Died
97
3
2
Survived
87
4
3
Died
372
5
3
Survived
119
Finally, you visualize your data:
fig = px.bar(
data_frame=grouped_df,
x="Pclass",
y="CategorySize",
color="Survived",
barmode="group",
)
fig.show()
I'm having trouble with your sample dataset. PclassSurvived.Pclass and PclassSurvived.CategorySize are not defined, and it's not 100% clear to me what you would like to accomplish here. But judging by your explanations and the structure of your dataset, it seems that this could get you somewhere:
Plot 1:
Code 1:
# imports
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np
data = dict(Pclass=[1,1,2,2,3,3],
Survived = [0,1,0,1,0,1],
CategorySize = [80,136,97,87,372,119] )
df=pd.DataFrame(data)
s0=df.query('Survived==0')
s1=df.query('Survived==1')
#layout = go.Layout(title= 'Pclass-Survived', xaxis = dict(title = 'Pclass'), yaxis = dict(title = 'CategorySize'),barmode='group' )
fig = go.Figure()
data=data['Pclass']
fig.add_trace(go.Bar(x=s0['Pclass'], y = s0['CategorySize'],
name='dead'
)
)
fig.add_trace(go.Bar(x=s1['Pclass'], y = s1['CategorySize'],
name='alive'
)
)
fig.update_layout(barmode='group')
fig.show()
Edit: You can produce the same plot using the plotly.offline module like this:
Code 2:
# Import the necessaries libraries
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd
# Set notebook mode to work in offline
pyo.init_notebook_mode()
# data
data = dict(Pclass=[1,1,2,2,3,3],
Survived = [0,1,0,1,0,1],
CategorySize = [80,136,97,87,372,119] )
df=pd.DataFrame(data)
#
s0=df.query('Survived==0')
s1=df.query('Survived==1')
fig = go.Figure()
data=data['Pclass']
fig.add_trace(go.Bar(x=s0['Pclass'], y = s0['CategorySize'],
name='dead'
)
)
fig.add_trace(go.Bar(x=s1['Pclass'], y = s1['CategorySize'],
name='alive'
)
)
pyo.iplot(fig, filename = 'your-library')
Alternative approach with stacked bars:
Plot 2:
Code 3:
# imports
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np
data = dict(Pclass=[1,1,2,2,3,3],
Survived = [0,1,0,1,0,1],
CategorySize = [80,136,97,87,372,119] )
df=pd.DataFrame(data)
s0=df.query('Survived==0')
s1=df.query('Survived==1')
#layout = go.Layout(title= 'Pclass-Survived', xaxis = dict(title = 'Pclass'), yaxis = dict(title = 'CategorySize'),barmode='group' )
fig = go.Figure()
data=data['Pclass']
fig.add_trace(go.Bar(x=s0['Pclass'], y = s0['CategorySize'],
name='dead'
)
)
fig.add_trace(go.Bar(x=s1['Pclass'], y = s1['CategorySize'],
name='alive'
)
)
df_tot = df.groupby('Pclass').sum()
annot1 = [dict(
x=xi,
y=yi,
text=str(yi),
xanchor='auto',
yanchor='bottom',
showarrow=False,
) for xi, yi in zip(df_tot.index, df_tot['CategorySize'])]
fig.update_layout(barmode='stack', annotations=annot1)
fig.show()

Categories

Resources