How to bin all outliers into one bin using Histogram in Plotly? - python

So the question is:
Can I plot a histogram in Plotly, where all values that are bigger than some threshold will be grouped into one bin?
The desired output:
But using standard plotly Histogram class I was able only to get this output:
import pandas as pd
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
test_df = pd.DataFrame({'values': [1]*10 + [2]*9 +
[3.1]*4 + [3.6]*4 +
[4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
[9]*2 + [10]*1 +
[111.2]*2 + [222.3]*2 + [333.4]*1}) # <- I want to group them into one bin "> 10"
data = [go.Histogram(x=test_df['values'],
xbins=dict(
start=0,
end=11,
size=1
),
autobinx = False)]
layout = go.Layout(
title='values'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')

So after spending some time I found a solution myself using numpy.Histogram and plotly Bar chart.
Leaving it here in case anyone will face the same problem.
def plot_bar_with_outliers(series, name, end):
start = int(series.min())
size = 1
# Making a histogram
largest_value = series.max()
if largest_value > end:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
else:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])
# Adding labels to the chart
labels = []
for i, j in zip(hist[1][0::1], hist[1][1::1]):
if j <= end:
labels.append('{} - {}'.format(i, j))
else:
labels.append('> {}'.format(i))
# Plotting the graph
data = [go.Bar(x=labels,
y=hist[0])]
layout = go.Layout(
title=name
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
plot_bar_with_outliers(test_df['values'], 'values', end=11)

An alternative to the above option is the following:
import numpy as np
# Initialize the values that you want in the histogram.
values = [7,8,8,8,9,10,10,11,12,13,14]
# Initialize the maximum x-axis value that you want.
maximum_value = 11
# Plot the histogram.
fig = go.Figure()
fig.add_trace(
go.Histogram(
x=[np.minimum(maximum_value, num) for num in values],
xbins = {"size": 1}
)
)
fig.show()
Link to Image

Related

How to use slider with plotly in order to show figure from begging to current step?

I want to use plotly to show 2 sinuse waves
I want to use slider to show the progress from time=0 to current slider step.
I tried to write the following code:
import numpy as np
import pandas as pd
if __name__ == "__main__":
time = np.arange(0, 10, 0.1)
val1 = np.sin(time)
val2 = np.sin(time) * np.sin(time)
df = pd.DataFrame(val1, columns=['val-1'])
df['val-2'] = val2
fig = px.scatter(df, animation_frame=df.index)
fig.update_layout(xaxis_range=[-100, 100])
fig.update_layout(yaxis_range=[-1.1, 1.1])
fig.show()
but I can see the current value of the 2 sinuse waves (and not the whole waves from step=0 to current step)
How can I change my code and see the whole sinuse waves from step=0 to current step ?
I don't think it is possible to animate a line chart in Express, so I would have to use a graph object. There is an example in the reference, which I will adapt to your assignment. As for the graph structure, create the initial graph data and the respective frames in the animation, add them to the layout by creating steps and sliders.
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
if __name__ == "__main__":
time = np.arange(0, 10, 0.1)
val1 = np.sin(time)
val2 = np.sin(time) * np.sin(time)
df = pd.DataFrame(val1, columns=['val-1'])
df['val-2'] = val2
data = [go.Scatter(mode='lines', x=df.index, y=df['val-1'], name='val-1'),
go.Scatter(mode='lines', x=df.index, y=df['val-2'], name='val-2')]
steps = []
for i in df.index:
step = dict(method="animate", args=[[i], {"title": f'step:{i}'}], label=f'{i}')
steps.append(step)
sliders = [dict(active=0, currentvalue={"prefix": "Step: "}, steps=steps)]
layout = go.Layout(dict(xaxis=dict(range=[-100,100]),
yaxis=dict(range=[-1.1,1.1]),
updatemenus=[dict(type='buttons',
buttons=[dict(label='Start', method='animate', args=[None]),
dict(label='Pause',
method='animate',
args=[[None], dict(frame=dict(
duration=0,
redraw=False),
mode="immediate",
formcurrent=True,
transition=dict(duration=0, easing="linear")
)])],
direction="left",
pad=dict(r=10, t=40),
showactive=False,
x=0.00,
xanchor="right",
y=0,
yanchor="top")],
sliders=sliders
))
frames = []
for i in df.index:
frame = go.Frame(data=[go.Scatter(x=df.index[0:i], y=df.loc[0:i,'val-1']),
go.Scatter(x=df.index[0:i], y=df.loc[0:i,'val-2'])],
layout=go.Layout(title_text=f'Step:{i}'),
name=i)
frames.append(frame)
fig = go.Figure(data=data, layout=layout, frames=frames)
fig.show()

How can I add overflow bin in my histogram using plotly/python [duplicate]

So the question is:
Can I plot a histogram in Plotly, where all values that are bigger than some threshold will be grouped into one bin?
The desired output:
But using standard plotly Histogram class I was able only to get this output:
import pandas as pd
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
test_df = pd.DataFrame({'values': [1]*10 + [2]*9 +
[3.1]*4 + [3.6]*4 +
[4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
[9]*2 + [10]*1 +
[111.2]*2 + [222.3]*2 + [333.4]*1}) # <- I want to group them into one bin "> 10"
data = [go.Histogram(x=test_df['values'],
xbins=dict(
start=0,
end=11,
size=1
),
autobinx = False)]
layout = go.Layout(
title='values'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
So after spending some time I found a solution myself using numpy.Histogram and plotly Bar chart.
Leaving it here in case anyone will face the same problem.
def plot_bar_with_outliers(series, name, end):
start = int(series.min())
size = 1
# Making a histogram
largest_value = series.max()
if largest_value > end:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
else:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])
# Adding labels to the chart
labels = []
for i, j in zip(hist[1][0::1], hist[1][1::1]):
if j <= end:
labels.append('{} - {}'.format(i, j))
else:
labels.append('> {}'.format(i))
# Plotting the graph
data = [go.Bar(x=labels,
y=hist[0])]
layout = go.Layout(
title=name
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
plot_bar_with_outliers(test_df['values'], 'values', end=11)
An alternative to the above option is the following:
import numpy as np
# Initialize the values that you want in the histogram.
values = [7,8,8,8,9,10,10,11,12,13,14]
# Initialize the maximum x-axis value that you want.
maximum_value = 11
# Plot the histogram.
fig = go.Figure()
fig.add_trace(
go.Histogram(
x=[np.minimum(maximum_value, num) for num in values],
xbins = {"size": 1}
)
)
fig.show()
Link to Image

Plotly Express line with continuous color scale

I have the following piece of code
import plotly.express as px
import pandas as pd
import numpy as np
x = [1,2,3,4,5,6]
df = pd.DataFrame(
{
'x': x*3,
'y': list(np.array(x)) + list(np.array(x)**2) + list(np.array(x)**.5),
'color': list(np.array(x)*0) + list(np.array(x)*0+1) + list(np.array(x)*0+2),
}
)
for plotting_function in [px.scatter, px.line]:
fig = plotting_function(
df,
x = 'x',
y = 'y',
color = 'color',
title = f'Using {plotting_function.__name__}',
)
fig.show()
which produces the following two plots:
For some reason px.line is not producing the continuous color scale that I want, and in the documentation for px.scatter I cannot find how to join the points with lines. How can I produce a plot with a continuous color scale and lines joining the points for each trace?
This is the plot I want to produce:
I am not sure this is possible using only plotly.express. If you use px.line, then you can pass the argument markers=True as described in this answer, but from the px.line documentation it doesn't look like continuous color scales are supported.
UPDATED ANSWER: in order to have both a legend that groups both the lines and markers together, it's probably simpest to use go.Scatter with the argument mode='lines+markers'. You'll need to add the traces one at a time (by plotting each unique color portion of the data one at a time) in order to be able to control each line+marker group from the legend.
When plotting these traces, you will need some functions to retrieve the colors of the lines from the continuous color scale because go.Scatter won't know what color your lines are supposed to be unless you specify them - thankfully that has been answered here.
Also you won't be able to generate a colorbar adding the markers one color at a time, so to add a colorbar, you can plot all of the markers at once using go.Scatter, but use the argument marker=dict(size=0, color="rgba(0,0,0,0)", colorscale='Plasma', colorbar=dict(thickness=20)) to display a colorbar, but ensure that these duplicate markers are not visible.
Putting all of this together:
# import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
x = [1,2,3,4,5,6]
df = pd.DataFrame(
{
'x': x*3,
'y': list(np.array(x)) + list(np.array(x)**2) + list(np.array(x)**.5),
'color': list(np.array(x)*0) + list(np.array(x)*0+1) + list(np.array(x)*0+2),
}
)
# This function allows you to retrieve colors from a continuous color scale
# by providing the name of the color scale, and the normalized location between 0 and 1
# Reference: https://stackoverflow.com/questions/62710057/access-color-from-plotly-color-scale
def get_color(colorscale_name, loc):
from _plotly_utils.basevalidators import ColorscaleValidator
# first parameter: Name of the property being validated
# second parameter: a string, doesn't really matter in our use case
cv = ColorscaleValidator("colorscale", "")
# colorscale will be a list of lists: [[loc1, "rgb1"], [loc2, "rgb2"], ...]
colorscale = cv.validate_coerce(colorscale_name)
if hasattr(loc, "__iter__"):
return [get_continuous_color(colorscale, x) for x in loc]
return get_continuous_color(colorscale, loc)
# Identical to Adam's answer
import plotly.colors
from PIL import ImageColor
def get_continuous_color(colorscale, intermed):
"""
Plotly continuous colorscales assign colors to the range [0, 1]. This function computes the intermediate
color for any value in that range.
Plotly doesn't make the colorscales directly accessible in a common format.
Some are ready to use:
colorscale = plotly.colors.PLOTLY_SCALES["Greens"]
Others are just swatches that need to be constructed into a colorscale:
viridis_colors, scale = plotly.colors.convert_colors_to_same_type(plotly.colors.sequential.Viridis)
colorscale = plotly.colors.make_colorscale(viridis_colors, scale=scale)
:param colorscale: A plotly continuous colorscale defined with RGB string colors.
:param intermed: value in the range [0, 1]
:return: color in rgb string format
:rtype: str
"""
if len(colorscale) < 1:
raise ValueError("colorscale must have at least one color")
hex_to_rgb = lambda c: "rgb" + str(ImageColor.getcolor(c, "RGB"))
if intermed <= 0 or len(colorscale) == 1:
c = colorscale[0][1]
return c if c[0] != "#" else hex_to_rgb(c)
if intermed >= 1:
c = colorscale[-1][1]
return c if c[0] != "#" else hex_to_rgb(c)
for cutoff, color in colorscale:
if intermed > cutoff:
low_cutoff, low_color = cutoff, color
else:
high_cutoff, high_color = cutoff, color
break
if (low_color[0] == "#") or (high_color[0] == "#"):
# some color scale names (such as cividis) returns:
# [[loc1, "hex1"], [loc2, "hex2"], ...]
low_color = hex_to_rgb(low_color)
high_color = hex_to_rgb(high_color)
return plotly.colors.find_intermediate_color(
lowcolor=low_color,
highcolor=high_color,
intermed=((intermed - low_cutoff) / (high_cutoff - low_cutoff)),
colortype="rgb",
)
fig = go.Figure()
## add the lines+markers
for color_val in df.color.unique():
color_val_normalized = (color_val - min(df.color)) / (max(df.color) - min(df.color))
# print(f"color_val={color_val}, color_val_normalized={color_val_normalized}")
df_subset = df[df['color'] == color_val]
fig.add_trace(go.Scatter(
x=df_subset['x'],
y=df_subset['y'],
mode='lines+markers',
marker=dict(color=get_color('Plasma', color_val_normalized)),
name=f"line+marker {color_val}",
legendgroup=f"line+marker {color_val}"
))
## add invisible markers to display the colorbar without displaying the markers
fig.add_trace(go.Scatter(
x=df['x'],
y=df['y'],
mode='markers',
marker=dict(
size=0,
color="rgba(0,0,0,0)",
colorscale='Plasma',
cmin=min(df.color),
cmax=max(df.color),
colorbar=dict(thickness=40)
),
showlegend=False
))
fig.update_layout(
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01),
yaxis_range=[min(df.y)-2,max(df.y)+2]
)
fig.show()
You can achieve this using only 2 more parameters in px.line:
markers=True
color_discrete_sequence=my_plotly_continuous_sequence
The complete code would look something like this (Note the list slicing [::4] so that the colors are well spaced):
import plotly.express as px
import pandas as pd
import numpy as np
x = [1, 2, 3, 4, 5, 6]
df = pd.DataFrame(
{
'x': x * 3,
'y': list(np.array(x)) + list(np.array(x) ** 2) + list(np.array(x) ** .5),
'color': list(np.array(x) * 0) + list(np.array(x) * 0 + 1) + list(np.array(x) * 0 + 2),
}
)
fig = px.line(
df,
x='x',
y='y',
color='color',
color_discrete_sequence=px.colors.sequential.Plasma[::4],
markers=True,
template='plotly'
)
fig.show()
This produces the following output.
In case you have more lines than the colors present in the colormap, you can construct a custom colorscale so that you get one complete sequence instead of a cycling sequence:
rgb = px.colors.convert_colors_to_same_type(px.colors.sequential.RdBu)[0]
colorscale = []
n_steps = 4 # Control the number of colors in the final colorscale
for i in range(len(rgb) - 1):
for step in np.linspace(0, 1, n_steps):
colorscale.append(px.colors.find_intermediate_color(rgb[i], rgb[i + 1], step, colortype='rgb'))
fig = px.line(df_e, x='temperature', y='probability', color='year', color_discrete_sequence=colorscale, height=900)
fig.show()

Plotly: How to retrieve regression results using plotly express?

You can easily plot a regression line using plotly express / px.scatter and retrieve regression results like beta using px.get_trendline_results(fig).iloc[0]["px_fit_results"].params[1]. But how can you retrieve other parameters like R-squared or p-vales for the coefficients?
Plot:
Code:
# imports
import plotly.express as px
import pandas as pd
import numpy as np
# data
np.random.seed(123)
numdays=20
X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})
# figure using px.scatter
fig = px.scatter(df, x="X", y="Y", trendline="ols", template = 'plotly_dark')
fig.show()
The answer:
model = px.get_trendline_results(fig)
results = model.iloc[0]["px_fit_results"]
alpha = params[0]
beta = .params[1]
p_beta = .pvalues[1]
r_squared = .rsquared
Details:
All regression results are available through:
px.get_trendline_results(fig)
Which, when run, will return a somewhat cryptic looking pandas dataframe:
px_fit_results
0 <statsmodels.regression.linear_model.Regressio...
The element under px_fit_results is an object of type statsmodels.regression.linear_model.RegressionResultsWrapper which is a wrapper for statsmodels.
So if we simplify matters a bit by setting:
models = px.get_trendline_results(fig)
And:
results = model.iloc[0]["px_fit_results"]
Then we can check what's available in that object using:
dir(results)
And find all the regression details one should need, like:
'predict',
'pvalues',
'remove_data',
'resid',
'resid_pearson',
'rsquared',
'rsquared_adj',
'save',
'scale',
'ssr',
'summary',
'summary2',
't_test',
't_test_pairwise',
But note that all these available results can be structured differently.
Running results.rsquared will return a single float 0.611901357827784, while running results.pvalues will return an array array([9.95834884e-01, 4.59734574e-05]). Which again will be subsettable for the constant and trendline through results.pvalues[0] and results.pvalues[1], respectively.
With this information available, you could for example extract some of them and include them as annotations to further improve your plotly figures:
Plot:
Complete code:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
import datetime
# data
np.random.seed(123)
numdays=20
X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})
# Figure using plotly express
fig = px.scatter(df, x="X", y="Y", trendline="ols", template = 'plotly_dark')
# retrieve model estimates
model = px.get_trendline_results(fig)
results = model.iloc[0]["px_fit_results"]
alpha = results.params[0]
beta = results.params[1]
p_beta = results.pvalues[1]
r_squared = results.rsquared
line1 = 'y = ' + str(round(alpha, 4)) + ' + ' + str(round(beta, 4))+'x'
line2 = 'p-value = ' + '{:.5f}'.format(p_beta)
line3 = 'R^2 = ' + str(round(r_squared, 3))
summary = line1 + '<br>' + line2 + '<br>' + line3
fig.add_annotation(
x=110,
y=140,
xref="x",
yref="y",
text=summary,
showarrow=False,
font=dict(
family="Courier New, monospace",
size=16,
color="#ffffff"
),
align="left",
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor="#636363",
ax=20,
ay=-30,
borderwidth=2,
borderpad=4,
bgcolor="rgba(100,100,100, 0.6)",
opacity=0.8
)
fig.show()

How to add more than one shape with loop in plotly

I use plotly package to show dynamic finance chart at python. However I didn't manage to put my all key points lines on one chart with for loop. Here is my code:
fig.update_layout(
for i in range(0,len(data)):
shapes=[
go.layout.Shape(
type="rect",
x0=data['Date'][i],
y0=data['Max_alt'][i],
x1='2019-12-31',
y1=data['Max_ust'][i],
fillcolor="LightSkyBlue",
opacity=0.5,
layer="below",
line_width=0)])
fig.show()
I have a data like below one. It is time series based EURUSD parity financial dataset. I calculated two constraits for both Local Min and Max. I wanted to draw rectangule shape to based on for each Min_alt / Min_ust and Max_alt / Max_range. I can draw for just one date like below image however I didn't manage to show all ranges in same plotly graph.
Here is the sample data set.
Here is the solution for added lines:
import datetime
colors = ["LightSkyBlue", "RoyalBlue", "forestgreen", "lightseagreen"]
ply_shapes = {}
for i in range(0, len(data1)):
ply_shapes['shape_' + str(i)]=go.layout.Shape(type="rect",
x0=data1['Date'][i].strftime('%Y-%m-%d'),
y0=data1['Max_alt'][i],
x1='2019-12-31',
y1=data1['Max_ust'][i],
fillcolor="LightSkyBlue",
opacity=0.5,
layer="below"
)
lst_shapes=list(ply_shapes.values())
fig1.update_layout(shapes=lst_shapes)
fig1.show()
However I have still problems to add traces to those lines. I mean text attribute.
Here is my code:
add_trace = {}
for i in range(0, len(data1)):
add_trace['scatter_' + str(i)] = go.Scatter(
x=['2019-12-31'],
y=[data1['Max_ust'][i]],
text=[str(data['Max_Label'][i])],
mode="text")
lst_trace = list(add_trace.values())
fig2=go.Figure(lst_trace)
fig2.show()
The answer:
For full control of each and every shape you insert, you could follow this logic:
fig = go.Figure()
#[...] data, traces and such
ply_shapes = {}
for i in range(1, len(df)):
ply_shapes['shape_' + str(i)]=go.layout.Shape()
lst_shapes=list(ply_shapes.values())
fig.update_layout(shapes=lst_shapes)
fig.show()
The details:
I'm not 100% sure what you're aimin to do here, but the following suggestion will answer your question quite literally regarding:
How to add more than one shape with loop in plotly?
Then you'll have to figure out the details regarding:
manage to put my all key points lines on one chart
Plot:
The plot itself is most likely not what you're looking for, but since you for some reason are adding a plot by the length of your data for i in range(0,len(data), I've made this:
Code:
This snippet will show how to handle all desired traces and shapes with for loops:
# Imports
import pandas as pd
#import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# data, random sample to illustrate stocks
np.random.seed(12345)
rows = 20
x = pd.Series(np.random.randn(rows),index=pd.date_range('1/1/2020', periods=rows)).cumsum()
y = pd.Series(x-np.random.randn(rows)*5,index=pd.date_range('1/1/2020', periods=rows))
df = pd.concat([y,x], axis = 1)
df.columns = ['StockA', 'StockB']
# lines
df['keyPoints1']=np.random.randint(-5,5,len(df))
df['keyPoints2']=df['keyPoints1']*-1
# plotly traces
fig = go.Figure()
stocks = ['StockA', 'StockB']
df[stocks].tail()
traces = {}
for i in range(0, len(stocks)):
traces['trace_' + str(i)]=go.Scatter(x=df.index,
y=df[stocks[i]].values,
name=stocks[i])
data=list(traces.values())
fig=go.Figure(data)
# shapes update
colors = ["LightSkyBlue", "RoyalBlue", "forestgreen", "lightseagreen"]
ply_shapes = {}
for i in range(1, len(df)):
ply_shapes['shape_' + str(i)]=go.layout.Shape(type="line",
x0=df.index[i-1],
y0=df['keyPoints1'].iloc[i-1],
x1=df.index[i],
y1=df['keyPoints2'].iloc[i-1],
line=dict(
color=np.random.choice(colors,1)[0],
width=30),
opacity=0.5,
layer="below"
)
lst_shapes=list(ply_shapes.values())
fig.update_layout(shapes=lst_shapes)
fig.show()
Also you can use fig.add_{shape}:
fig = go.Figure()
fig.add_trace(
go.Scatter( ...)
for i in range( 1, len( vrect)):
fig.add_vrect(
x0=vrect.start.iloc[ i-1],
x1=vrect.finish.iloc[ i-1],
fillcolor=vrect.color.iloc[ i-1]],
opacity=0.25,
line_width=0)
fig.show()

Categories

Resources