Using multiple sliders to create dynamic chart in bqplt/jupyter - python

I am trying to plot a dynamic portfolio performance that changes as the weights of the portfolio change
Assume a portfolio has 2 components with a 50% weighting each. I want to show a chart of the portfolio with sliders representing the weights of the components. I then want to be able to slide the values of the weights around and have the portfolio chart dynamically update.
I have done this for a portfolio that consists of one weight but cant figure out how to amend for more than 1 weight - maybe I need a different approach.
Example below substitutes a random df with 1 column in place of my portfolio df - process should be the same.
In terms of this example if the df had 2 columns - how can I get it working with 2 sliders controlling each weight ?
from bqplot import DateScale, LinearScale, Axis, Figure, Lines
from ipywidgets import FloatSlider, VBox
import pandas as pd
import numpy as np
slider = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight A')
df = pd.DataFrame(np.random.randint(0,100,size=(100, 1)), columns=list('A'))
x_sc = LinearScale()
y_sc = LinearScale()
ax_x = Axis(label='Date', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='Price', scale=y_sc, orientation='vertical', grid_lines='solid')
line = Lines(y=df['A'],x=df.index , scales={'x': x_sc, 'y': y_sc}, colors = ['#FF0000'])
line2 = Lines(y=df['A'],x=df.index , scales={'x': x_sc, 'y': y_sc})
fig = Figure(axes=[ax_x, ax_y], marks=[line, line2], title='Price Chart')
def new_chart(value):
new_y = df[['A']]*slider.value
line.y = new_y
slider.observe(new_chart,'value')
VBox([fig,slider])

Not sure if I have understood you. Do you mean this?
from bqplot import DateScale, LinearScale, Axis, Figure, Lines
from ipywidgets import FloatSlider, VBox
import pandas as pd
import numpy as np
slider = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight A')
sliderB = FloatSlider(value=1, min = 0, max = 1, step = .01, description = 'Weight B')
df = pd.DataFrame(np.random.randint(0,100,size=(100, 1)), columns=list('A'))
df['B'] = np.random.randint(0,100,size=(100, 1))
x_sc = LinearScale()
y_sc = LinearScale()
ax_x = Axis(label='Date', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='Price', scale=y_sc, orientation='vertical', grid_lines='solid')
line = Lines(y=df['A']+df['B'],x=df.index , scales={'x': x_sc, 'y': y_sc}, colors = ['#FF0000'])
line2 = Lines(y=df['A']+df['B'],x=df.index , scales={'x': x_sc, 'y': y_sc})
fig = Figure(axes=[ax_x, ax_y], marks=[line, line2, ], title='Price Chart')
def new_chart(change):
line.y = df['A']*slider.value + df['B']*sliderB.value
slider.observe(new_chart,'value')
sliderB.observe(new_chart,'value')
VBox([fig,slider,sliderB])

Related

How to use slider with plotly in order to show figure from begging to current step?

I want to use plotly to show 2 sinuse waves
I want to use slider to show the progress from time=0 to current slider step.
I tried to write the following code:
import numpy as np
import pandas as pd
if __name__ == "__main__":
time = np.arange(0, 10, 0.1)
val1 = np.sin(time)
val2 = np.sin(time) * np.sin(time)
df = pd.DataFrame(val1, columns=['val-1'])
df['val-2'] = val2
fig = px.scatter(df, animation_frame=df.index)
fig.update_layout(xaxis_range=[-100, 100])
fig.update_layout(yaxis_range=[-1.1, 1.1])
fig.show()
but I can see the current value of the 2 sinuse waves (and not the whole waves from step=0 to current step)
How can I change my code and see the whole sinuse waves from step=0 to current step ?
I don't think it is possible to animate a line chart in Express, so I would have to use a graph object. There is an example in the reference, which I will adapt to your assignment. As for the graph structure, create the initial graph data and the respective frames in the animation, add them to the layout by creating steps and sliders.
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
if __name__ == "__main__":
time = np.arange(0, 10, 0.1)
val1 = np.sin(time)
val2 = np.sin(time) * np.sin(time)
df = pd.DataFrame(val1, columns=['val-1'])
df['val-2'] = val2
data = [go.Scatter(mode='lines', x=df.index, y=df['val-1'], name='val-1'),
go.Scatter(mode='lines', x=df.index, y=df['val-2'], name='val-2')]
steps = []
for i in df.index:
step = dict(method="animate", args=[[i], {"title": f'step:{i}'}], label=f'{i}')
steps.append(step)
sliders = [dict(active=0, currentvalue={"prefix": "Step: "}, steps=steps)]
layout = go.Layout(dict(xaxis=dict(range=[-100,100]),
yaxis=dict(range=[-1.1,1.1]),
updatemenus=[dict(type='buttons',
buttons=[dict(label='Start', method='animate', args=[None]),
dict(label='Pause',
method='animate',
args=[[None], dict(frame=dict(
duration=0,
redraw=False),
mode="immediate",
formcurrent=True,
transition=dict(duration=0, easing="linear")
)])],
direction="left",
pad=dict(r=10, t=40),
showactive=False,
x=0.00,
xanchor="right",
y=0,
yanchor="top")],
sliders=sliders
))
frames = []
for i in df.index:
frame = go.Frame(data=[go.Scatter(x=df.index[0:i], y=df.loc[0:i,'val-1']),
go.Scatter(x=df.index[0:i], y=df.loc[0:i,'val-2'])],
layout=go.Layout(title_text=f'Step:{i}'),
name=i)
frames.append(frame)
fig = go.Figure(data=data, layout=layout, frames=frames)
fig.show()

How to add labels to the side color bar in clustermap in Seaborn/Python

I have written a Python script as follows to plot a clustermap.
import sys
import importlib
import matplotlib.pyplot as plt
# import PRCC function
import PRCC as prcc
import QSP_analysis as qa
#%%
import numpy as np
from pyDOE2 import lhs
# Reading data
num_samples = 20
num_param = 15
num_readout = 9
header, data = qa.read_csv('3.csv')
param_names = header[1:num_param+1]
read_names = header[num_param+1:]
lhd = data[:,1:num_param+1].astype(float)
readout = data[:,num_param+1:].astype(float)
Rho, Pval, Sig, Pval_correct = prcc.partial_corr(lhd, readout, 1e-14,Type = 'Spearman', MTC='Bonferroni')
sig_txt = np.zeros((num_param, num_readout), dtype='U8')
sig_txt[Pval_correct<5e-2] = '*'
sig_txt[Pval_correct<1e-6] = '**'
sig_txt[Pval_correct<1e-9] = '***'
param_group = ["beige"]*10 + ["khaki"]*(num_param-10)
readout_group = ["#B4B4FF"]*2+ ["mediumslateblue"]*(num_readout-2)
importlib.reload(qa)
cm = qa.cluster_map(np.transpose(Pval), read_names,param_names,
(10,6), cmap="bwr",
annot=np.transpose(sig_txt),
row_colors = readout_group,
col_colors = param_group,
col_cluster=False, row_cluster=False,
show_dendrogram = [False, False])
cm.savefig('heat.png',format='png', dpi=600,bbox_inches='tight')
The functions in the code are as below:
def read_csv(filename, header_line = 1, dtype = str):
with open(filename) as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
# header
header = ''
for i in range(header_line):
header = next(reader)
# data
data = np.asarray(list(reader), dtype = dtype)
return header, data
def cluster_map(data, row_label, col_label, fig_size, annot = None,
show_dendrogram = [True, True], **kwarg):
df = pd.DataFrame(data=data, index = row_label, columns = col_label)
g = sns.clustermap(df, annot = annot, fmt = '',
vmin=-1, vmax=1, cbar_kws={"ticks":[-1, -.5, 0, .5, 1]}, **kwarg)
#row_order = g.dendrogram_row.reordered_ind
#col_order = g.dendrogram_col.reordered_ind
g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0)
g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=-55, ha = 'left')
g.ax_row_dendrogram.set_visible(show_dendrogram[0])
g.ax_col_dendrogram.set_visible(show_dendrogram[1])
g.fig.set_size_inches(*fig_size)
return g
The PRCC function is a script to call MATLAB and calculate the PRCC value. The main scrip reads a csv file with 20 rows and 24 columns with different headers. The output of the code is a clustermap based on some columns (vertical:read_names and horizontal:param_names).
I have added a color bar to categorize the variables on the horizontal and vertical axes. The output figure is like below. How can add labels to these color-bar: for the horizontal one (ABM" and "QSP") and for the vertical one ("endpoint" and "pretreatment")?
Cluster-map from the script
The following example code supposes you are calling sns.clustermap with row_cluster=False, col_cluster=False), so the rows and columns stay in their original order (if they get reordered, the original groups will get separated).
groupby from itertools can be used to calculate run lengths of the lists of colors. Their cumulative sums indicate the border between colors. Averaging these positions suits to place the labels.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from itertools import groupby
def cluster_map(data, row_label, col_label, fig_size, annot=None, row_color_labels=None, col_color_labels=None,
show_dendrogram=[True, True], **kwarg):
df = pd.DataFrame(data=data, index=row_label, columns=col_label)
g = sns.clustermap(df, annot=annot, fmt='',
vmin=-1, vmax=1, cbar_kws={"ticks": [-1, -.5, 0, .5, 1]}, **kwarg)
g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), va='center')
if row_color_labels is not None:
row_colors = kwarg['row_colors']
borders = np.cumsum([0] + [sum(1 for i in g) for k, g in groupby(row_colors)])
for b0, b1, label in zip(borders[:-1], borders[1:], row_color_labels):
g.ax_row_colors.text(-0.06, (b0 + b1) / 2, label, color='black', ha='right', va='center', rotation=90,
transform=g.ax_row_colors.get_yaxis_transform())
if col_color_labels is not None:
col_colors = kwarg['col_colors']
borders = np.cumsum([0] + [sum(1 for i in g) for k, g in groupby(col_colors)])
for b0, b1, label in zip(borders[:-1], borders[1:], col_color_labels):
g.ax_col_colors.text((b0 + b1) / 2, 1.06, label, color='black', ha='center', va='bottom',
transform=g.ax_col_colors.get_xaxis_transform())
cluster_map(np.random.uniform(-1, 1, size=(7, 12)),
fig_size=(12, 12),
col_label=[*'ABCDEFGHIJKL'],
row_label=['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe'],
col_colors=["beige"] * 10 + ["khaki"] * (12 - 10),
row_colors=["#B4B4FF"] * 2 + ["mediumslateblue"] * (7 - 2),
col_color_labels=["ABM", "QSP"],
row_color_labels=["endpoint", "pretreatment"],
row_cluster=False,
col_cluster=False)
You can plot very complex heatmaps from data frame using a python package PyComplexHeatmap: https://github.com/DingWB/PyComplexHeatmap
https://github.com/DingWB/PyComplexHeatmap/blob/main/notebooks/examples.ipynb

How do I show different yaxis value in a subplot?

I want to show different ranges in the first row and different ranges in the second row?
For instance first row can show up to 50 and second can show up to 100?
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# set seed
np.random.seed(41)
#create three different normally distributed datasets
score_array_A = np.random.normal(size = 100, loc = 15, scale=5)
score_array_B = np.random.normal(size = 200, loc = 50, scale=10)
score_array_C = np.random.normal(size = 300, loc = 70, scale=15)
#turn normal arrays into dataframes
#score_data['T(s)']
score_df_A = pd.DataFrame({'T(s)':score_array_A,'D':'2'})
score_df_B = pd.DataFrame({'T(s)':score_array_B,'D':'3'})
score_df_C = pd.DataFrame({'T(s)':score_array_C,'D':'4'})
#concat dataframes together
score_data = pd.concat([score_df_A,score_df_B,score_df_C])
score_data = score_data.assign(Req = np.where(score_data['T(s)']%5 > 1, "1", "5"))
#to plot subplots
px.box(data_frame = score_data
,y = 'T(s)'
,facet_col = 'D'
, facet_row = 'Req'
,facet_col_wrap = 0,
template='simple_white',
width=600,
height=300
)
After creating the figure using Plotly Express update each yaxis so that it is not configured to match the primary yaxis. Have also updated showticklabels
import plotly.express as px
import numpy as np
import pandas as pd
# set seed
np.random.seed(41)
# create three different normally distributed datasets
score_array_A = np.random.normal(size=100, loc=15, scale=5)
score_array_B = np.random.normal(size=200, loc=50, scale=10)
score_array_C = np.random.normal(size=300, loc=70, scale=15)
# turn normal arrays into dataframes
# score_data['T(s)']
score_df_A = pd.DataFrame({"T(s)": score_array_A, "D": "2"})
score_df_B = pd.DataFrame({"T(s)": score_array_B, "D": "3"})
score_df_C = pd.DataFrame({"T(s)": score_array_C, "D": "4"})
# concat dataframes together
score_data = pd.concat([score_df_A, score_df_B, score_df_C])
score_data = score_data.assign(Req=np.where(score_data["T(s)"] % 5 > 1, "1", "5"))
# to plot subplots
fig = px.box(
data_frame=score_data,
y="T(s)",
facet_col="D",
facet_row="Req",
facet_col_wrap=0,
template="simple_white",
width=600,
height=300,
)
fig.update_layout(
{
yax: {"matches": None, "showticklabels": True}
for yax in fig.to_dict()["layout"].keys()
if "yaxis" in yax
}
)

I'm getting and error trying to use ColumnDataSource in Bokeh

I'm getting this error:
TypeError: Object of type Interval is not JSON serializable
Here is my code.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models import NumeralTickFormatter
def construct_labels(start, end):
labels = []
for index, x in enumerate(start):
y = end[index]
labels.append('({}, {}]'.format(x, y))
return labels
values = {'Length': np.random.uniform(0, 4, 10)}
df = pd.DataFrame(values, columns=['Length'])
bin_step_size = 0.5
# List of bin points.
p_bins = np.arange(0, (df['Length'].max() + bin_step_size), bin_step_size)
# Reduce the tail to create the left side bounds.
p_left_limits = p_bins[:-1].copy()
# Cut the head to create the right side bounds.
p_right_limits = np.delete(p_bins, 0)
# Create the bins.
p_range_bins = pd.IntervalIndex.from_arrays(p_left_limits, p_right_limits)
# Create labels.
p_range_labels = construct_labels(p_left_limits, p_right_limits)
p_ranges_binned = pd.cut(
df['Length'],
p_range_bins,
labels=p_range_labels,
precision=0,
include_lowest=True)
out = p_ranges_binned
counts = out.value_counts(sort=False)
total_element_count = len(df.index)
foo = pd.DataFrame({'bins': counts.index, 'counts': counts})
foo.reset_index(drop=True, inplace=True)
foo['percent'] = foo['counts'].apply(lambda x: x / total_element_count)
foo['percent_full'] = foo['counts'].apply(lambda x: x / total_element_count * 100)
bin_labels = p_range_labels
# Data Container
source = ColumnDataSource(dict(
bins=foo['bins'],
percent=foo['percent'],
count=foo['counts'],
labels=pd.DataFrame({'labels': bin_labels})
))
p = figure(x_range=bin_labels, plot_height=600, plot_width=1200, title="Range Counts",
toolbar_location=None, tools="")
p.vbar(x='labels', top='percent', width=0.9, source=source)
p.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")
p.xaxis.major_label_orientation = math.pi / 2
p.xgrid.grid_line_color = None
p.y_range.start = 0
output_file("bars.html")
show(p)
The error comes from here:
source = ColumnDataSource(dict(
bins=foo['bins'],
percent=foo['percent'],
count=foo['counts'],
labels=pd.DataFrame({'labels': bin_labels})
))
The bins you passed in is a interval type that cannot be JSON serialized.
After review your code, this bins variable is not used in your plotting, so you can change it to:
source = ColumnDataSource(dict(
percent=foo['percent'],
count=foo['counts'],
labels=bin_labels
))
Notice that I also changed your labels to bin_labels, which is a list and ColumnDataSource can use list as input. But you may want to further format these labels, as right now they are just like
['(0.0, 0.5]',
'(0.5, 1.0]',
'(1.0, 1.5]',
'(1.5, 2.0]',
'(2.0, 2.5]',
'(2.5, 3.0]',
'(3.0, 3.5]',
'(3.5, 4.0]']
You might want to format them to something prettier.
After this small change you should be able to see your bar graph:

Bokeh patches plot with dates as x-axis shifts the ticks one to the right

I'm trying to adapt the brewer example (http://docs.bokeh.org/en/latest/docs/gallery/stacked_area.html) to my needs. One of the things I'd like is to have dates at the x-axis. I did the following:
timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-07-01', freq='MS')]
p = figure(x_range=FactorRange(factors=timesteps), y_range=(0, 800))
p.xaxis.major_label_orientation = np.pi/4
as an adaptation of the previous line
p = figure(x_range=(0, 19), y_range=(0, 800))
The dates are displayed, but the first date 1950-01-01 sits at x=1. How can I shift it to x=0? The first real data points I have are for that date and therefore should be displayed together with that date and not one month later.
Well, if you have a list of strings as your x axis, then apparently the count starts at 1, then you have to modify your x data for the plot to start at 1. Actually the brewer example (http://docs.bokeh.org/en/latest/docs/gallery/stacked_area.html) has a range from 0 to 19, so it has 20 data points not 19 like your timesteps list. I modified the x input for the plot as : data['x'] = np.arange(1,N+1) to start from 1 to N. And I added one more day to your list: timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-08-01', freq='MS')]
Here is the complete code:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import brewer
N = 20
categories = ['y' + str(x) for x in range(10)]
data = {}
data['x'] = np.arange(1,N+1)
for cat in categories:
data[cat] = np.random.randint(10, 100, size=N)
df = pd.DataFrame(data)
df = df.set_index(['x'])
def stacked(df, categories):
areas = dict()
last = np.zeros(len(df[categories[0]]))
for cat in categories:
next = last + df[cat]
areas[cat] = np.hstack((last[::-1], next))
last = next
return areas
areas = stacked(df, categories)
colors = brewer["Spectral"][len(areas)]
x2 = np.hstack((data['x'][::-1], data['x']))
timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-08-01', freq='MS')]
p = figure(x_range=bokeh.models.FactorRange(factors=timesteps), y_range=(0, 800))
p.grid.minor_grid_line_color = '#eeeeee'
p.patches([x2] * len(areas), [areas[cat] for cat in categories],
color=colors, alpha=0.8, line_color=None)
p.xaxis.major_label_orientation = np.pi/4
bokeh.io.show(p)
And here is the output:
UPDATE
You can leave data['x'] = np.arange(0,N) from 0 to 19, and then use offset=-1 inside FactorRange, i.e. figure(x_range=bokeh.models.FactorRange(factors=timesteps,offset=-1),...
Update version bokeh 0.12.16
In this version I am using datetime for x axis which has the advantage of nicer formatting when zooming in.
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import brewer
timesteps = [x for x in pd.date_range('1950-01-01', '1951-07-01', freq='MS')]
N = len(timesteps)
cats = 10
df = pd.DataFrame(np.random.randint(10, 100, size=(N, cats))).add_prefix('y')
def stacked(df):
df_top = df.cumsum(axis=1)
df_bottom = df_top.shift(axis=1).fillna({'y0': 0})[::-1]
df_stack = pd.concat([df_bottom, df_top], ignore_index=True)
return df_stack
areas = stacked(df)
colors = brewer['Spectral'][areas.shape[1]]
x2 = np.hstack((timesteps[::-1], timesteps))
p = figure( x_axis_type='datetime', y_range=(0, 800))
p.grid.minor_grid_line_color = '#eeeeee'
p.patches([x2] * areas.shape[1], [areas[c].values for c in areas],
color=colors, alpha=0.8, line_color=None)
p.xaxis.formatter = bokeh.models.formatters.DatetimeTickFormatter(
months=["%Y-%m-%d"])
p.xaxis.major_label_orientation = 3.4142/4
output_file('brewer.html', title='brewer.py example')
show(p)

Categories

Resources