Remembering selected points in streaming Bokeh plot - python

I'm slamming my head against the wall with this problem. I simply want to be able to make a selection (lasso, box_select) of points in a streaming bokeh scatterplot that will be remembered when the figure updates (e.g., with new data in the time series).
I think this will require me to be able to access the list of the indices of currently selected points, but I can't figure out how to. Here's an example where I try (slightly modified from the example at
http://docs.bokeh.org/en/latest/docs/user_guide/server.html#streaming-data-with-the-server
Note that selected points are deselected when the plot updates to the new streamed (shuffled in this example) data.
import time
from random import shuffle
from bokeh.plotting import figure, output_server, cursession, show
# prepare output to server
output_server("remember_selected")
p = figure(plot_width=400, plot_height=400,tools="lasso_select,box_select,help")
p.scatter([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], name='ex_line')
show(p)
# create some simple animation..
# first get our figure example data source
renderer = p.select(dict(name="ex_line"))
ds = renderer[0].data_source
while True:
# Update y data of the source object
shuffle(ds.data["y"])
#Can I access currently selected points? (NO!)
print ds.selected['2d']
print ds.selected['1d']
print ds.selected['0d']
# store the updated source on the server
cursession().store_objects(ds)
time.sleep(2.)

To get callbacks, you need to setup a complete Bokeh application. The following demonstrated this:
from bokeh.models import ColumnDataSource, Plot
from bokeh.plotting import figure
from bokeh.properties import Instance
from bokeh.server.app import bokeh_app
from bokeh.models.widgets import HBox
from bokeh.server.utils.plugins import object_page
from random import shuffle
class App(HBox):
extra_generated_classes = [["App", "App", "HBox"]]
jsmodel = "HBox"
# plots
plot = Instance(Plot)
source = Instance(ColumnDataSource)
#classmethod
def create(cls):
# create layout widgets
obj = cls()
# outputs
obj.source = ColumnDataSource(data=dict(x=[1, 2, 3, 4, 5],
y=[6, 7, 2, 4, 5]))
obj.plot = figure(x_range=(0, 6), y_range=(0, 7),
tools="lasso_select,box_select,help", plot_width=400,
plot_height=400)
obj.plot.scatter('x', 'y', source=obj.source, name='ex_line')
# layout
obj.children = [obj.plot]
obj.setup_events()
return obj
def setup_events(self):
super(App, self).setup_events()
if self.source:
self.source.on_change('selected', self, 'click')
def click(self, cds, name, prev, new):
print(new['1d']['indices'])
shuffle(self.source.data['y'])
#bokeh_app.route("/bokeh/select/")
#object_page("select")
def make():
app = App.create()
return app

Related

Open new plot with `bokeh` TapTool

I have a class Collection that holds a bunch of other class objects Thing that all have the same attributes with different values. The Collection.plot(x, y) method makes a scatter plot of the x values vs. the y values of all the collected Thing objects like so:
from bokeh.plotting import figure, show
from bokeh.models import TapTool
class Thing:
def __init__(self, foo, bar, baz):
self.foo = foo
self.bar = bar
self.baz = baz
def plot(self):
# Plot all data for thing
fig = figure()
fig.circle([1,2,3], [self.foo, self.bar, self.baz])
return fig
class Collection:
def __init__(self, things):
self.things = things
def plot(self, x, y):
# Configure plot
title = '{} v {}'.format(x, y)
fig = figure(title=title, tools=['pan', 'tap'])
taptool = fig.select(type=TapTool)
taptool.callback = RUN_THING_PLOT_ON_CLICK()
# Plot data
xdata = [getattr(th, x) for th in self.things]
ydata = [getattr(th, y) for th in self.things]
fig.circle(xdata, ydata)
return fig
Then I would make a scatter plot of all four Thing sources' 'foo' vs. 'baz' values with:
A = Thing(2, 4, 6)
B = Thing(3, 6, 9)
C = Thing(7, 2, 5)
D = Thing(9, 2, 1)
X = Collection([A, B, C, D])
X.plot('foo', 'baz')
What I would like to have happen here is have each point on the scatter plot able to be clicked. On click, it would run the plot method for the given Thing, making a separate plot of all its 'foo', 'bar', and 'baz' values.
Any ideas on how this can be accomplished?
I know I can just load ALL the data for all the objects into a ColumnDataSource and make the plot using this toy example, but in my real use case the Thing.plot method does a lot of complicated calculations and may be plotting thousands of points. I really need it to actually run the Thing.plot method and draw the new plot. Is that feasible?
Alternatively, could I pass the Collection.plot method a list of all the Thing.plot pre-drawn figures to then display on click?
Using Python>=3.6 and bokeh>=2.3.0. Thank you very much!
I edited your code and sorry i returned too late.
from bokeh.plotting import figure, show
from bokeh.models import TapTool, ColumnDataSource
from bokeh.events import Tap
from bokeh.io import curdoc
from bokeh.layouts import Row
class Thing:
def __init__(self, foo, bar, baz):
self.foo = foo
self.bar = bar
self.baz = baz
def plot(self):
# Plot all data for thing
t_fig = figure(width=300, height=300)
t_fig.circle([1, 2, 3], [self.foo, self.bar, self.baz])
return t_fig
def tapfunc(self):
selected_=[]
'''
here we get selected data. I select by name (foo, bar etc.) but also x/y works. There is a loop because taptool
has a multiselect option. All selected names adds to selected_
'''
for i in range(len(Collection.source.selected.indices)):
selected_.append(Collection.source.data['name'][Collection.source.selected.indices[i]])
print(selected_) # your selected data
# now create a graph according to selected_. I use only first item of list. But you can use differently.
if Collection.source.selected.indices:
if selected_[0] == "foo":
A = Thing(2, 4, 6).plot()
layout.children = [main, A]
elif selected_[0] == "bar":
B = Thing(3, 6, 9).plot()
layout.children = [main, B]
elif selected_[0] == 'baz':
C = Thing(7, 2, 5).plot()
layout.children = [main, C]
class Collection:
# Columndata source. Also could be added in __init__
source = ColumnDataSource(data={
'x': [1, 2, 3, 4, 5],
'y': [6, 7, 8, 9, 10],
'name': ['foo', 'bar', 'baz', None, None]
})
def __init__(self):
pass
def plot(self):
# Configure plot
TOOLTIPS = [
("(x,y)", "(#x, #y)"),
("name", "#name"),
]
fig = figure(width=300, height=300, tooltips=TOOLTIPS)
# Plot data
circles = fig.circle(x='x', y='y', source=self.source, size=10)
fig.add_tools(TapTool())
fig.on_event(Tap, tapfunc)
return fig
main = Collection().plot()
layout = Row(children=[main])
curdoc().add_root(layout)
The problem is when you select something every time Thing class creates a new figure. It's not recommended. So, you could create all graphs and make them visible/invisible as your wishes OR you could change the source of the graph. You could find lots of examples about changing graph source and making them visible/invisible. I hope it works for you :)
There are two ways to do that. This is basic example. First, you could use Tap event to do that and create a function to get information from glyph. Second, you could directly connect source to function.
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.events import Tap
from bokeh.models import TapTool, ColumnDataSource
def tapfunc():
print(source.selected.indices)
def sourcefunc(attr, old, new):
print(source.selected)
source = ColumnDataSource(data={
'x': [1,2,3,4,5],
'y': [6,7,8,9,10]
})
p = figure(width=400, height=400)
circles = p.circle(x='x', y='y', source=source, size=20, color="navy", alpha=0.5)
p.add_tools(TapTool())
p.on_event(Tap, tapfunc)
source.selected.on_change('indices', sourcefunc)
curdoc().add_root(p)
selected return a list a selected values index. so, you should add index to your source. You could use with pandas for index. For more information about selection check here. So in function you could create a new figure and glyph (line etc.) and update it. Here, very good example. You could pull and run it from your pc.

Python callback in Bokeh issue

I am trying to get a callback function to work in bokeh based on this simple example:
from bokeh.plotting import figure, curdoc
from bokeh.layouts import column
from bokeh.models import ColumnDataSource
TOOLS = "tap,reset"
p = figure(title="Some Figure", tools=TOOLS)
source = ColumnDataSource(dict(x=[[1, 3, 2], [3, 4, 6, 6]],
y=[[2, 1, 4], [4, 7, 8, 5]], name=['A', 'B']))
pglyph = p.patches('x', 'y', source=source)
def callback(attr, old, new):
# The index of the selected glyph is : new['1d']['indices'][0]
print("In callback")
patch_name = source.data['name'][new['1d']['indices'][0]]
print("TapTool callback executed on Patch {}".format(patch_name))
pglyph.data_source.on_change('selected',callback)
curdoc().add_root(column(p))
When I load the page and click on a polygon, I do not see the callback getting executed.
What is missing?
That's because the selected attribute value is not changed. The contained object is changed instead, and Bokeh doesn't detect deep changes.
Try replacing the callback function and the next line with:
def callback(attr, old, new):
print("In callback")
patch_name = source.data['name'][new[0]]
print("TapTool callback executed on Patch {}".format(patch_name))
pglyph.data_source.selected.on_change('indices', callback)

Cannot get a Bokeh checkbox group to update graph

I am trying to create a simple interactive graph with a checkbox group. I want the checkboxes to result in showing the appropriate line on the graph when ticked. I am doing this within Jupyter Notebook.
I've managed to get it embedded in Jupyter, and I wrote a callback function that does execute code. I am able to create a new ColumnDataSource from the checkbox selection. However, the graph is just not updating.
I've gone through every post on here I could, and look at every tutorial I could find. Most of them simply have an update callback which creates the new source, then sets the graph's source to the new one, which I believe is supposed to update the graph. I have also seen variations where people assign it as oldsource.data = newsource.data. This doesn't work for me either.
I am wondering whether there is any inherent limitations in embedding to Jupyter Notebook that I need Javascript for, or limitations to how sources can be updated. Or maybe I am just missing something very obvious? Code below:
import os
import pandas as pd
import numpy as np
import bokeh.plotting as bk
import bokeh.layouts as ly
import bokeh.models as md
import bokeh.colors as cl
import bokeh.palettes as plet
from bokeh.io import curdoc
from bokeh.io import show as io_show
from bokeh.models.widgets import CheckboxGroup, Select, Button
from bokeh.plotting import output_file, show, figure, output_notebook, reset_output, curdoc
data_list = ["one", "two", "three", "four", "five", "six"]
data_list2 = ["one", "two"]
data_fac = [1, 2, 3, 4, 5, 6]
data_fac_dict = dict(zip(data_list,data_fac))
data_x = np.linspace(0,100)
df = pd.DataFrame(columns = data_list)
def modify_doc(doc):
def make_data(data_list):
#Make new source with appropriate datasets
df = pd.DataFrame(columns = data_list)
for case in data_list:
df[case] = data_x * data_fac_dict[case]
result = md.ColumnDataSource(df)
return result
#Make colors
list_colors = plet.Dark2[len(data_list)]
dict_colors = dict(zip(data_list,list_colors))
#Default source with one datapoint
src = make_data(["one"])
print(src.data.keys())
#Plot graphs
p = bk.figure()
for case in src.data.keys():
if case != "index":
p.line(source = src, x = 'index', y = case, color = dict_colors[case])
print("plotting loop")
def update(attr,old,new):
#Callback
print("update triggered")
selection = list()
for i in wg_chk.active:
selection.append(data_list[i])
src = make_data(selection)
print(selection)
wg_chk = CheckboxGroup(labels = data_list, active = [0]*len(data_list))
wg_chk.on_change('active', update)
layout = ly.row(wg_chk,p)
doc.add_root(layout)
bk.show(modify_doc, notebook_url='localhost:8888')
UPDATE #1
I changed the code in the callback to make the appropriate dataframe, then create a dict using ColumnDataSource.from_df, then set src.data equal to it as below. Still doesn't seem to work. I used a print to make sure data_new has correct keys.
df_new = make_df(selection)
data_new = md.ColumnDataSource.from_df(df_new)
src.data = data_new
For clarity, I am using the newest version of Bokeh and Python as of today (Bokeh 1.0.2, Python 3.7.1)
UPDATE #2
As per the comments, I pre-generated all the required glyphs ahead of time, so they are, in essence, "slots for data" instead of being generated on demand for any amount of datasets. As they are now persistent, this allows me to toggle them on/off with the .visible property easily. I now have six "slots" for data to be plotted with corresponding glyphs, and I added a function within the callback to update their respective data sources (in this case, changing a linear to a quadratic curve). I also updated Bokeh to the newest version (1.3.4). Note that this is specifically embeddable in a Jupyter Notebook.
Here is the code for reference:
import os
import pandas as pd
import numpy as np
import bokeh.plotting as bk
import bokeh.layouts as ly
import bokeh.models as md
import bokeh.colors as cl
import bokeh.palettes as plet
from bokeh.io import curdoc
from bokeh.io import show as io_show
from bokeh.models.widgets import CheckboxGroup, Select, Button, RadioGroup
from bokeh.plotting import output_file, show, figure, output_notebook, reset_output, curdoc
data_list = ["one", "two", "three", "four", "five", "six"]
data_list2 = ["one", "two"]
data_fac = [1, 2, 3, 4, 5, 6]
data_fac_dict = dict(zip(data_list,data_fac))
data_x = np.linspace(0,100)
df = pd.DataFrame(columns = data_list)
for case in data_list:
df[case] = data_x * data_fac_dict[case] + np.power(data_x, 3) * data_fac_dict[case]
def modify_doc(doc):
#Make colors
list_colors = plet.Dark2[len(data_list)]
dict_colors = dict(zip(data_list,list_colors))
p = bk.figure()
def make_line(case):
line = p.line(x = 'index', y = case, source = src_store[case], color = dict_colors[case])
return line
#Make six sources, make one line per source, and set them to invisible
src_store = dict()
list_lines = dict()
for case in data_list:
src_store[case] = md.ColumnDataSource(df[[case]])
list_lines[case] = make_line(case)
list_lines[case].visible = False
#First checkbox defaults to ticked, so let's show it by default.
list_lines["one"].visible = True
def modify_data(order):
#Modify the data and update the six sources' data with it
df = pd.DataFrame(columns = data_list)
src_store_new = dict()
data_new = dict()
for case in data_list:
df[case] = data_x * data_fac_dict[case] + np.power(data_x,order) * data_fac_dict[case]
data_new[case] = md.ColumnDataSource.from_df(df[[case]])
src_store[case].data = data_new[case]
def update(attr,old,new):
#Callback
print("update triggered")
#Get selection of lines to display
selection = list()
for i in wg_chk.active:
selection.append(data_list[i])
#Set visibility according to selection
for case in data_list:
list_lines[case].visible = case in selection
#Get line multiplier from radio buttons and update sources
order = wg_rad.active + 1
modify_data(order)
print(selection)
wg_rad = RadioGroup(labels=["x*0", "x*1"], active = 0)
wg_chk = CheckboxGroup(labels = data_list, active = [0]*len(data_list))
wg_chk.on_change('active', update)
wg_rad.on_change('active', update)
layout = ly.row(ly.column(wg_chk,wg_rad),p)
doc.add_root(layout)
bk.show(modify_doc, notebook_url='localhost:8888')
When you plot a Bokeh glyph, that glyph object has an associated data source. If you want the glyph to update, you need to update that existing datasource, i.e. modify it by setting it's .data property. The code above does not do that. It creates a new data source, that is not attached to or configured on anything, and then immediate throws it away (it's a local variable in a function, since nothing keeps a reference to it, it disappears when the function finishes).
You need to update whatever existing data source that you used initially:
source.data = new_data # plain python dict
And, at least as of Bokeh 1.3.4 new_data must be a plain Python dictionary. It is not supported to "migrate" a .data value from one CDS to another:
source1.data = source2.data # BAD! WILL NOT WORK
Attempting to do so will probably raise an explicit error in the near future. There is a from_df static method on ColumnDataSource you can use to convert DataFrames to the right kind of dict.

For loop for bokeh in databricks

Hi i would like to loop through for loop eg. few times code below to get few times the same graph in one databricks cell:
I imported library:
from bokeh.plotting import figure
from bokeh.embed import components, file_html
from bokeh.resources import CDN
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]
for i in range(5):
p = figure(title='test', x_axis_label = 'x values', y_axis_label='y values')
p.line(x,y, line_width =2)
html = file_html(p,CDN,'plot')
displayHTML(html)
I was trying to use for loop but still i am obtaining only one single graph in a cell.
Also tried with different modules, like here:
from bokeh.io import output_file, show
from bokeh.plotting import figure
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]
for i in range(5):
p = figure(title='test', x_axis_label = 'x values')
p.line(x,y, line_width =2)
output_file("line"+str(i)+".html")
show(p)
But here i am not getting any result, nothing is plotted. Can you tell me why?
Tried this as well:
d={}
for i in range(5):
p = figure(title='test', x_axis_label = 'x values')
p.line(x,y, line_width =2)
d["html{0}".format(i)]=file_html(p,CDN, 'plot' + str(i))
for j in d:
displayHTML(j)
It looks like with bokeh in databricks it is possible to display only one graph per cell. Anyone knows if it is true?
Could you help me with the syntax for for loop to get it multiple times?
It seems from other answers that there may be issues with Databricks using old notebook versions that Bokeh does not fully support. If the standard output_notebook is not working, but the displayHTML thing does, then I'd say your best bet is to collect the plots you want to show in a column layout, then show them all at once with a single call to show at the end:
from bokeh.layouts import column
plots = []
for i in range(5):
p = figure(...)
p.line(...)
plots.append(p)
layout = column(*plots)
# only use this way for databricks
html = file_html(layout, CDN, 'plot')
displayHTML(html)

Setting absolute screen position of Bokeh Charts in Web App

I am trying to set the absolute position of a Bokeh Chart inside a Layout so that one of the plots is shown on top of another plot. Right now when I am plotting something like this:
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.layouts import layout
import numpy as np
x = np.arange(1,10.1,0.1)
y = [i**2 for i in x]
categories = ['A', 'B']
values = [1000, 1500]
fig1 = figure(width=600,plot_height=600, title="First Plot")
fig1.line(x=x, y=y)
fig2 = figure(width=200,plot_height=250,x_range=categories,
title="Second Plot") fig2.vbar(x=categories, top=values, width=0.2)
l = layout([[fig1,fig2]])
curdoc().add_root(l)
The result will be this:
What I am searching for is some way to make it look like that:
How can this result be achieved?
Thank you!
This is what I came up with (works for Bokeh v1.0.4). You need to move your mouse over the plot to get the other one jump inside but you could also copy the JS code from the callback and manually add it to the HTML generated by Bokeh so you achieve the same result.
from bokeh.plotting import figure, show
from bokeh.layouts import Row
from bokeh.models import ColumnDataSource, CDSView, BooleanFilter, CustomJS, BoxSelectTool, HoverTool
import pandas as pd
plot = figure(tools = 'hover', tooltips = [("x", "#x"), ("y", "#y")])
circles = plot.circle('x', 'y', size = 20, source = ColumnDataSource({'x': [1, 2, 3], 'y':[1, 2, 3]}))
inner_plot = figure(name = 'inner_plot', plot_width = 200, plot_height = 200)
lines = inner_plot.line('x', 'y', source = ColumnDataSource({'x': [8, 9, 10], 'y':[8, 6, 8]}))
code = """ div = document.getElementsByClassName('bk-root')[0];
tooltip_plot = div.children[0].children[1]
tooltip_plot.style = "position:absolute; left: 340px; top: 350px;"; """
callback = CustomJS(code = code)
plot.js_on_event('mousemove', callback)
show(Row(plot, inner_plot))
Result:

Categories

Resources