I am trying to create a simple interactive graph with a checkbox group. I want the checkboxes to result in showing the appropriate line on the graph when ticked. I am doing this within Jupyter Notebook.
I've managed to get it embedded in Jupyter, and I wrote a callback function that does execute code. I am able to create a new ColumnDataSource from the checkbox selection. However, the graph is just not updating.
I've gone through every post on here I could, and look at every tutorial I could find. Most of them simply have an update callback which creates the new source, then sets the graph's source to the new one, which I believe is supposed to update the graph. I have also seen variations where people assign it as oldsource.data = newsource.data. This doesn't work for me either.
I am wondering whether there is any inherent limitations in embedding to Jupyter Notebook that I need Javascript for, or limitations to how sources can be updated. Or maybe I am just missing something very obvious? Code below:
import os
import pandas as pd
import numpy as np
import bokeh.plotting as bk
import bokeh.layouts as ly
import bokeh.models as md
import bokeh.colors as cl
import bokeh.palettes as plet
from bokeh.io import curdoc
from bokeh.io import show as io_show
from bokeh.models.widgets import CheckboxGroup, Select, Button
from bokeh.plotting import output_file, show, figure, output_notebook, reset_output, curdoc
data_list = ["one", "two", "three", "four", "five", "six"]
data_list2 = ["one", "two"]
data_fac = [1, 2, 3, 4, 5, 6]
data_fac_dict = dict(zip(data_list,data_fac))
data_x = np.linspace(0,100)
df = pd.DataFrame(columns = data_list)
def modify_doc(doc):
def make_data(data_list):
#Make new source with appropriate datasets
df = pd.DataFrame(columns = data_list)
for case in data_list:
df[case] = data_x * data_fac_dict[case]
result = md.ColumnDataSource(df)
return result
#Make colors
list_colors = plet.Dark2[len(data_list)]
dict_colors = dict(zip(data_list,list_colors))
#Default source with one datapoint
src = make_data(["one"])
print(src.data.keys())
#Plot graphs
p = bk.figure()
for case in src.data.keys():
if case != "index":
p.line(source = src, x = 'index', y = case, color = dict_colors[case])
print("plotting loop")
def update(attr,old,new):
#Callback
print("update triggered")
selection = list()
for i in wg_chk.active:
selection.append(data_list[i])
src = make_data(selection)
print(selection)
wg_chk = CheckboxGroup(labels = data_list, active = [0]*len(data_list))
wg_chk.on_change('active', update)
layout = ly.row(wg_chk,p)
doc.add_root(layout)
bk.show(modify_doc, notebook_url='localhost:8888')
UPDATE #1
I changed the code in the callback to make the appropriate dataframe, then create a dict using ColumnDataSource.from_df, then set src.data equal to it as below. Still doesn't seem to work. I used a print to make sure data_new has correct keys.
df_new = make_df(selection)
data_new = md.ColumnDataSource.from_df(df_new)
src.data = data_new
For clarity, I am using the newest version of Bokeh and Python as of today (Bokeh 1.0.2, Python 3.7.1)
UPDATE #2
As per the comments, I pre-generated all the required glyphs ahead of time, so they are, in essence, "slots for data" instead of being generated on demand for any amount of datasets. As they are now persistent, this allows me to toggle them on/off with the .visible property easily. I now have six "slots" for data to be plotted with corresponding glyphs, and I added a function within the callback to update their respective data sources (in this case, changing a linear to a quadratic curve). I also updated Bokeh to the newest version (1.3.4). Note that this is specifically embeddable in a Jupyter Notebook.
Here is the code for reference:
import os
import pandas as pd
import numpy as np
import bokeh.plotting as bk
import bokeh.layouts as ly
import bokeh.models as md
import bokeh.colors as cl
import bokeh.palettes as plet
from bokeh.io import curdoc
from bokeh.io import show as io_show
from bokeh.models.widgets import CheckboxGroup, Select, Button, RadioGroup
from bokeh.plotting import output_file, show, figure, output_notebook, reset_output, curdoc
data_list = ["one", "two", "three", "four", "five", "six"]
data_list2 = ["one", "two"]
data_fac = [1, 2, 3, 4, 5, 6]
data_fac_dict = dict(zip(data_list,data_fac))
data_x = np.linspace(0,100)
df = pd.DataFrame(columns = data_list)
for case in data_list:
df[case] = data_x * data_fac_dict[case] + np.power(data_x, 3) * data_fac_dict[case]
def modify_doc(doc):
#Make colors
list_colors = plet.Dark2[len(data_list)]
dict_colors = dict(zip(data_list,list_colors))
p = bk.figure()
def make_line(case):
line = p.line(x = 'index', y = case, source = src_store[case], color = dict_colors[case])
return line
#Make six sources, make one line per source, and set them to invisible
src_store = dict()
list_lines = dict()
for case in data_list:
src_store[case] = md.ColumnDataSource(df[[case]])
list_lines[case] = make_line(case)
list_lines[case].visible = False
#First checkbox defaults to ticked, so let's show it by default.
list_lines["one"].visible = True
def modify_data(order):
#Modify the data and update the six sources' data with it
df = pd.DataFrame(columns = data_list)
src_store_new = dict()
data_new = dict()
for case in data_list:
df[case] = data_x * data_fac_dict[case] + np.power(data_x,order) * data_fac_dict[case]
data_new[case] = md.ColumnDataSource.from_df(df[[case]])
src_store[case].data = data_new[case]
def update(attr,old,new):
#Callback
print("update triggered")
#Get selection of lines to display
selection = list()
for i in wg_chk.active:
selection.append(data_list[i])
#Set visibility according to selection
for case in data_list:
list_lines[case].visible = case in selection
#Get line multiplier from radio buttons and update sources
order = wg_rad.active + 1
modify_data(order)
print(selection)
wg_rad = RadioGroup(labels=["x*0", "x*1"], active = 0)
wg_chk = CheckboxGroup(labels = data_list, active = [0]*len(data_list))
wg_chk.on_change('active', update)
wg_rad.on_change('active', update)
layout = ly.row(ly.column(wg_chk,wg_rad),p)
doc.add_root(layout)
bk.show(modify_doc, notebook_url='localhost:8888')
When you plot a Bokeh glyph, that glyph object has an associated data source. If you want the glyph to update, you need to update that existing datasource, i.e. modify it by setting it's .data property. The code above does not do that. It creates a new data source, that is not attached to or configured on anything, and then immediate throws it away (it's a local variable in a function, since nothing keeps a reference to it, it disappears when the function finishes).
You need to update whatever existing data source that you used initially:
source.data = new_data # plain python dict
And, at least as of Bokeh 1.3.4 new_data must be a plain Python dictionary. It is not supported to "migrate" a .data value from one CDS to another:
source1.data = source2.data # BAD! WILL NOT WORK
Attempting to do so will probably raise an explicit error in the near future. There is a from_df static method on ColumnDataSource you can use to convert DataFrames to the right kind of dict.
Related
I have 2 bokeh rows. The top row contains a DataTable and a TextInput, both of which are able to stretch_width in order to fit the width of the browser. The bottom row contains a gridplot, which is able to stretch_width, but only does so by distorting the scale of the image. Ideally, I would like the gridplot to update the amount of columns displayed based on the size of the browser.
Consider the following example:
import pandas as pd
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models import ColumnDataSource, TextInput
from bokeh.plotting import figure, output_file, save
from bokeh.layouts import row, column, gridplot
def get_datatable():
"""this can stretch width without issue"""
df = pd.DataFrame({'a': [0, 1, 2], 'b': [2, 3, 4]})
source = ColumnDataSource(df)
Columns = [TableColumn(field=i, title=i) for i in df.columns]
data_table = DataTable(columns=Columns, source=source, sizing_mode='stretch_width', max_width=9999)
return data_table
def get_text_input():
"""this can stretch width without issue"""
return TextInput(value='Example', title='Title', sizing_mode="stretch_width", max_width=9999)
def get_gridplot():
"""
this requires columns to be hard-coded
stretch_width is an option, but only distorts the images if enabled
"""
figs = []
for _ in range(30):
fig = figure(x_range=(0,10), y_range=(0,10))
_ = fig.image_rgba(image=[], x=0, y=0)
figs.append(fig)
return gridplot(children=figs, ncols=2)
top_row = row([get_datatable(), get_text_input()], max_width=9999, sizing_mode='stretch_width')
bottom_row = row(get_gridplot())
col = column(top_row, bottom_row, sizing_mode="stretch_width")
output_file("example.html")
save(col)
My end goal is to have the gridplot automatically update the amount of columns based on the width of the browser. Is there a way to do this natively in bokeh? If not, is it possible to do this via a CustomJs javascript callback?
Solution
Consider using sizing_mode=“scale_width” when calling figure.
fig = figure(x_range=(0,10), y_range=(0,10), sizing_mode=“scale_width”)
Note
It may be preferable to use scale_width instead of stretch_width more generally.
Bokeh Doc Example: https://docs.bokeh.org/en/latest/docs/user_guide/layout.html#multiple-objects
I'm trying to get xy coordinates of points drawn by the user. I want to have them as a dictionary, a list or a pandas DataFrame.
I'm using Bokeh 2.0.2 in Jupyter. There'll be a background image (which is not the focus of this post) and on top, the user will create points that I could use further.
Below is where I've managed to get to (with some dummy data). And I've commented some lines which I believe are the direction in which I'd have to go. But I don't seem to get the grasp of it.
from bokeh.plotting import figure, show, Column, output_notebook
from bokeh.models import PointDrawTool, ColumnDataSource, TableColumn, DataTable
output_notebook()
my_tools = ["pan, wheel_zoom, box_zoom, reset"]
#create the figure object
p = figure(title= "my_title", match_aspect=True,
toolbar_location = 'above', tools = my_tools)
seeds = ColumnDataSource({'x': [2,14,8], 'y': [-1,5,7]}) #dummy data
renderer = p.scatter(x='x', y='y', source = seeds, color='red', size=10)
columns = [TableColumn(field="x", title="x"),
TableColumn(field="y", title="y")]
table = DataTable(source=seeds, columns=columns, editable=True, height=100)
#callback = CustomJS(args=dict(source=seeds), code="""
# var data = source.data;
# var x = data['x']
# var y = data['y']
# source.change.emit();
#""")
#
#seeds.x.js_on_change('change:x', callback)
draw_tool = PointDrawTool(renderers=[renderer])
p.add_tools(draw_tool)
p.toolbar.active_tap = draw_tool
show(Column(p, table))
From the documentation at https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#pointdrawtool:
The tool will automatically modify the columns on the data source corresponding to the x and y values of the glyph. Any additional columns in the data source will be padded with the declared empty_value, when adding a new point. Any newly added points will be inserted on the ColumnDataSource of the first supplied renderer.
So, just check the corresponding data source, seeds in your case.
The only issue here is if you want to know exactly what point has been changed or added. In this case, the simplest solution would be to create a custom subclass of PointDrawTool that does just that. Alternatively, you can create an additional "original" data source and compare seeds to it each time it's updated.
The problem is that the execute it in Python. But show create a static version. Here is a simple example that fix it! I removed the table and such to make it a bit cleaner, but it will also work with it:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import PointDrawTool
output_notebook()
#create the figure object
p = figure(width=400,height=400)
renderer = p.scatter(x=[0,1], y=[1,2],color='red', size=10)
draw_tool = PointDrawTool(renderers=[renderer])
p.add_tools(draw_tool)
p.toolbar.active_tap = draw_tool
# This part is imporant
def app(doc):
global p
doc.add_root(p)
show(app) #<-- show app and not p!
here's my data :https://drive.google.com/drive/folders/1CabmdDQucaKW2XhBxQlXVNOSiNRtkMm-?usp=sharing
i want to use the select to choose the stock i want to show;
and slider to choose the year range i want to show;
and checkboxgroup to choose the index i want to compare with.
the problem is when i adjust the slider, the figure will update, but when i use the select and checkboxgroup, the figure won't update,
what's the reason?
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Slider, TextInput , Select , Div, CheckboxGroup
from bokeh.plotting import figure
import pandas as pd
import numpy as np
price=pd.read_excel('price.xlsx',index_col=0)
# input control
stock = Select(title='Stock',value='AAPL',options=[x for x in list(price.columns) if x not in ['S&P','DOW']])
yr_1 = Slider(title='Start year',value=2015,start=2000,end=2020,step=1)
yr_2 = Slider(title='End year',value=2020,start=2000,end=2020,step=1)
index = CheckboxGroup(labels=['S&P','DOW'],active=[0,1])
def get_data():
compare_index = [index.labels[i] for i in index.active]
stocks = stock.value
start_year = str(yr_1.value)
end_year = str(yr_2.value)
select_list = []
select_list.append(stocks)
select_list.extend(compare_index)
selected = price[select_list]
selected = selected [start_year:end_year]
for col in selected.columns:
selected[col]=selected[col]/selected[col].dropna()[0]
return ColumnDataSource(selected)
def make_plot(source):
fig=figure(plot_height=600, plot_width=700, title="",sizing_mode="scale_both", x_axis_type="datetime")
data_columns = list(source.data.keys())
for data in data_columns[1:]:
fig.line(x=data_columns[0],y=data,source=source,line_width=3, line_alpha=0.6, legend_label=data)
return fig
def update(attrname, old, new):
new_source = get_data()
source.data.clear()
source.data.update(new_source.data)
#get the initial value and plot
source = get_data()
plot = make_plot(source)
#control_update
stock.on_change('value', update)
yr_1.on_change('value', update)
yr_2.on_change('value', update)
index.on_change('active', update)
# Set up layouts and add to document
inputs = column(stock, yr_1, yr_2, index)
curdoc().add_root(row(inputs, plot, width=800))
curdoc().title = "Stocks"
You're creating a new ColumnDataSource for new data. That's not a good approach.
Instead, create it once and then just assign its data as appropriate.
In your case, I would do it like this:
Create ColumnDataSource just once, as described above
Do not use .update on CDS, just reassign .data
Create the legend manually
For that one line that's susceptible to the select change choose a static x field and use it everywhere instead
Change the first legend item's label when you change the select's value to instead of that x field it has the correct name
I am using Bokeh on Jupyter Notebooks to help with data visualization. I wanted to be able to plot the data from a panda DataFrame, and then when I hover over the Bokeh plot, all the feature values should be visible in the hover Box. However, with the code below, only the index correctly displays, and all the other fields appear as ???, and I'm not sure why.
Here is my working example
//Importing all the neccessary things
import numpy as np
import pandas as pd
from bokeh.layouts import row, widgetbox, column
from bokeh.models import CustomJS, Slider, Select, HoverTool
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.io import push_notebook, output_notebook, curdoc
from bokeh.client import push_session
#from bokeh.scatter_with_hover import scatter_with_hover
output_notebook()
np.random.seed(0)
samples = np.random.randint(low = 0, high = 1000, size = 1000)
samples = samples.reshape(200,5)
cols = ["A", "B", "C", "D", "E"]
df = pd.DataFrame(samples, columns=cols)
# Here is a dict of some keys that I want to be able to pick from for plotting
labels = list(df.columns.values)
axis_map = {key:key for key in labels}
code2 = ''' var data = source.data;
//axis values with select widgets
var value1 = val1.value;
var value2 = val2.value;
var original_data = original_source.data
// get data corresponding to selection
x = original_data[value1];
y = original_data[value2];
data['x'] = x;
data['y'] = y;
source.trigger('change');
// set axis labels
x_axis.axis_label = value1;
y_axis.axis_label = value2;
'''
datas = "datas"
source = ColumnDataSource(data=dict( x=df['A'], y=df['B'],
label = labels, datas = df))
original_source = ColumnDataSource(data=df.to_dict(orient='list'))
a= source.data[datas].columns.values
#print a.columns.values
print a
TOOLS = [ HoverTool(tooltips= [(c, '#' + c) for c in source.data[datas].columns.values] +
[('index', '$index')] )]
# hover.tooltips.append(('index', '$index'))
#plot the figures
plot = figure(plot_width=800, plot_height=800, tools= TOOLS)
plot.scatter(x= "x",y="y", source=source, line_width=2, line_alpha=0.6,
size = 3)
callback = CustomJS(args=dict(source=source, original_source = original_source,
x_axis=plot.xaxis[0],y_axis=plot.yaxis[0]), code=code2)
#Create two select widgets to pick the features of interest
x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="A", callback = callback)
callback.args["val1"] = x_axis
callbackDRange.args["val1"]= x_axis
y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="B", callback = callback)
callback.args["val2"] = y_axis
callbackDRange.args["val2"]= y_axis
plot.xaxis[0].axis_label = 'A'
plot.yaxis[0].axis_label = 'B'
#Display the graph in a jupyter notebook
layout = column(plot, x_axis, y_axis )
show(layout, notebook_handle=True)
I'm even passing in the full dataframe into the source ColumnDataSource so I can access it later, but it won't work. Any guidance would be greatly appreciated!
Running your code in recent version of Bokeh results in the warning:
Which suggests the root of the problem. If we actually look at the data source you create for the glyph:
source = ColumnDataSource(data=dict(x=df['A'],
y=df['B'],
label=labels,
datas = df))
It's apparent what two things are going wrong:
You are violating the fundamental assumption that all CDS columns must always be the same length at all times. The CDS is like a "Cheap DataFrame", i.e. it doesn't have ragged columns. All the columns must have the same length just like a DataFrame.
You are configuring a HoverTool to report values from columns named "A", "B", etc. but your data source has no such columns at all! The CDS you create above for your glyph has columns named:
"x", "y"
"labels" which is the wrong length
"datas" which has a bad value, columns should normally be 1-d arrays
The last column "datas" is irrelevant, BTW. I think you must think the hover tool will somehow look in that column for information to display but that is not how hover tool works. Bottom line:
If you configured a tooltip for a field #A then your CDS must have a column named "A"
And that's exactly what's not the case here.
It's hard to say exactly how you should change your code without more context about what exactly you want to do. I guess I'd suggest taking a closer look at the documention for hover tools in the User's Guide
I have a bokeh figure that has a reset button in the toolbar. Basically, I want to "reset" the figure when I update the data that I'm plotting in the figure. How can I do that?
UPDATE: A PR has been submitted for this feature. After Bokeh 0.12.16 is released, the following will work:
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import Button, CustomJS
from bokeh.plotting import figure
p = figure(tools="reset,pan,wheel_zoom,lasso_select")
p.circle(list(range(10)), list(range(10)))
b = Button()
b.js_on_click(CustomJS(args=dict(p=p), code="""
p.reset.emit()
"""))
show(column(p, b))
As of Bokeh 0.12.1 there is no built in function to do this. It would possible to make a custom extension that does this. However, that would take a little work and experimentation and dialogue. If you'd like to pursue that option, I'd encourage you to come to the public mailing list which is better suited to iterative collaboration and discussion than SO. Alternatively, please feel free to open a feature request on the project issue tracker
Example with a radiogroup callback, that's the best way I found to reset while changing plots, just get the range of the data and set it to the range:
from bokeh.plotting import Figure
from bokeh.models import ColumnDataSource, CustomJS, RadioGroup
from bokeh.layouts import gridplot
from bokeh.resources import CDN
from bokeh.embed import file_html
x0 = range(10)
x1 = range(100)
y0 = [i for i in x0]
y1 = [i*2 for i in x1][::-1]
fig=Figure()
source1=ColumnDataSource(data={"x":[],"y":[]})
source2=ColumnDataSource(data={"x0":x0,"x1":x1,"y0":y0,"y1":y1})
p = fig.line(x='x',y='y',source=source1)
callback=CustomJS(args=dict(s1=source1,s2=source2,px=fig.x_range,py=fig.y_range), code="""
var d1 = s1.get("data");
var d2 = s2.get("data");
var val = cb_obj.active;
d1["y"] = [];
var y = d2["y"+val];
var x = d2["x"+val];
var min = Math.min( ...y );
var max = Math.max( ...y );
py.set("start",min);
py.set("end",max);
var min = Math.min( ...x );
var max = Math.max( ...x );
px.set("start",min);
px.set("end",max);
for(i=0;i<=y.length;i++){
d1["y"].push(d2["y"+val][i]);
d1["x"].push(d2["x"+val][i]);
}
s1.trigger("change");
""")
radiogroup=RadioGroup(labels=['First plot','Second plot'],active=0,callback=callback)
grid = gridplot([[fig,radiogroup]])
outfile=open('TEST.html','w')
outfile.write(file_html(grid,CDN,'Reset'))
outfile.close()
The Bokeh website is seriously lacking in examples for different ways to set callbacks for the different widgets.
I was struggling to make it work with Bokeh 2.2.1, but this JS p.reset.emit() does not seem to work.
What worked for me was to manually set the Figure renderers attribute to an empty list inside a callback function, called via on_click(). This only works with a Bokeh server running, though:
$ bokeh serve --show example.py
example.py:
from bokeh.layouts import column
from bokeh.models import Button
from bokeh.plotting import curdoc, figure
p = figure(tools="reset,pan,wheel_zoom,lasso_select")
p.circle(list(range(10)), list(range(10)))
def clear_plot(attr):
p.renderers = []
b = Button(label="Clear plot")
b.on_click(clear_plot)
curdoc().add_root(column(p, b))