How to plot multiple ColumnDataSource data with hovertools in Bokeh? - python

The Goal:
To generate a scatter plot from a pandas DataFrame with 3 columns: x, y, type (either a, b or c). The data points should have different colors based on the type. Every data point should have a hover effect. However, data points with type c should have a tap effect too. The data file (data_file.csv) looks something like:
x
y
z
1
4
a
2
3
b
3
2
a
4
4
c
..
..
..
My attempt:
First, I imported the dataframe and divided into two parts: one with c type data and another with the everything else. Then I created two columndatasource and plotted the data. Is there a shortcut or better way than this? Also, I couldn't achieve some feature (see below).
Code:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, OpenURL, TapTool
from bokeh.models.tools import HoverTool
from bokeh.transform import factor_cmap
file = "data.csv"
df = read_csv(file, skiprows=1, header=None, sep="\t")
# now I will seperate the dataframe into two: one with type **a** & **b**
# and another dataframe containing with type **c**
c_df = df.drop(df[df[2] != 'c'].index)
ab_df = df.drop(df[df[2] == 'c'].index)
ab_source = ColumnDataSource(data=dict(
Independent = ab_df[0],
Dependent = ab_df[1],
Type = ab_df[2]
))
c_source = ColumnDataSource(data=dict(
Independent = c_df[0],
Dependent = c_df[1],
Type = c_df[2],
link = "http://example.com/" + c_df[0].apply(str) + ".php"
))
p = figure(title="Random PLot")
p.circle('Independent', 'Dependent',
size=10,
source=ab_source,
color=factor_cmap('Type',
['red', 'blue'],
['a', 'b']),
legend_group='Type'
)
p.circle('Independent', 'Dependent',
size=12,
source=c_source,
color=factor_cmap('Type',
['green'],
['c']),
name='needsTapTool'
)
p.legend.title = "Type"
hover = HoverTool()
hover.tooltips = """
<div>
<h3>Type: #Type</h3>
<p> #Independent and #Dependent </p>
</div>
"""
p.add_tools(hover)
url = "#link"
tap = TapTool(names=['needsTapTool'])
tap.callback = OpenURL(url=url)
p.add_tools(tap)
show(p)
Problems:
(1) How can I add two different hover tools so that different data points will behave differently depending on their type? Whenever I add another hover tool, only the last one is getting effective.
(2) How can I take part of a data in CDS? For example, imagine I have a column called 'href' which contains a link but have a "http://www" part. Now how can I set the 'link' variable inside a CDS that doesn't contain this part? when I try:
c_source = ColumnDataSource(data=dict(
link = c_df[3].apply(str)[10:]
))
I get a keyError. Any help will be appreciated.

It is possible to define multiple Tools, even multiple HoverTools in one plot. The trick is the collect the renderers and apply them to a specific tool.
In the example below, two HoverTools are added and on TapTool.
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, OpenURL, TapTool, HoverTool
output_notebook()
df = pd.DataFrame({'x':[1,2,3,4], 'y':[4,3,2,1], 'z':['a','b','a','c']})
>>
x y z
0 1 4 a
1 2 3 b
2 3 2 a
3 4 1 c
color = {'a':'red', 'b':'blue', 'c':'green'}
p = figure(width=300, height=300)
# list to collect the renderers
renderers = []
for item in df['z'].unique():
df_group = df[df['z']==item].copy()
# if group 'c', add some urls
if item == 'c':
# url with "https"
df_group['link'] = df_group.loc[:,'x'].apply(lambda x: f"https://www.example.com/{x}.php")
# url without "https"
df_group['plain_link'] = df_group.loc[:,'x'].apply(lambda x: f"example.com/{x}.php")
renderers.append(
p.circle(
'x',
'y',
size=10,
source=ColumnDataSource(df_group),
color=color[item],
legend_label=item
)
)
p.legend.title = "Type"
# HoverTool for renderers of group 'a' and 'b'
hover = HoverTool(renderers=renderers[:2])
hover.tooltips = """
<div>
<h3>Type: #z</h3>
<p> #x and #y </p>
</div>
"""
p.add_tools(hover)
# HoverTool for renderer of group 'c'
hover_c = HoverTool(renderers=[renderers[-1]])
hover_c.tooltips = """
<div>
<h3>Type: #z</h3>
<p> #x and #y </p>
<p> #plain_link </p>
</div>
"""
p.add_tools(hover_c)
# TapTool for renderer of group 'c'
tap = TapTool(renderers=[renderers[-1]], callback=OpenURL(url="#link"))
p.add_tools(tap)
show(p)

Related

Bokeh hover special variable `$data_x` shows number instead of FactorRange category label for multi-line glyph

I am using Bokeh multi_line to show several lines using a categorical x_range,
and would like hover to display the x category hovered. I thought $data_x might help, but it shows numerical values related to category indexes rather than the category labels. I can use CustomJSHover with special_vars["segment_index"] to display what I want, but is there a simpler way?
To demonstrate, this code creates a figure with multi_line():
from collections import defaultdict
import pandas as pd
from bokeh import palettes
from bokeh.plotting import show, figure
from bokeh.models import CustomJSHover, HoverTool
# Substantive data.
df_data = pd.DataFrame.from_records([
dict(date="2001 Q1", output=100, inputs=100),
dict(date="2001 Q2", output=105, inputs=102),
dict(date="2001 Q3", output=110, inputs=105),
])
# Make list of lists for multi_line(), with metadata.
lines_data = defaultdict(list)
for var in ["inputs", "output"]:
lines_data["variable"].append(var)
lines_data["date"].append(df_data["date"])
lines_data["value"].append(df_data[var])
lines_data["color"] = palettes.Category10_10[:2]
fig = figure(
x_range = df_data["date"],
plot_height=400,
)
fig.multi_line(
source = lines_data,
xs = "date",
ys = "value",
color = "color",
legend_group = "variable",
line_width = 5,
line_alpha = 0.6,
hover_line_alpha = 1.0, # Highlight hover line.
)
The hover I want can be created like this using CustomJSHover:
# Custom hover formatting #date.
hover_date = CustomJSHover(
# Show value[$segment_index].
code="""
console.log("> Show value[$segment_index] hover", value);
return "" + value[special_vars["segment_index"]];
""")
fig.add_tools(HoverTool(
tooltips=[
('variable', '#variable'),
('date', '#date{custom}'), # Show hovered date only.
('value', '$data_y'),
],
formatters={'#date': hover_date},
))
show(fig)
Potentially a more straightforward hover specification would use something like $data_x without a custom format, except $data_x itself apparently does not look up the label in the FactorRange (applying this HoverTool instead of the one above):
# Simple hover showing $data_x.
fig.add_tools(HoverTool(
tooltips=[
('variable', '#variable'),
('date', '$data_x'), # Does not show x_range value!
('value', '$data_y'),
]))
show(fig)
Now, hovering over a line shows a 'date' like "1.500" instead of "2001 Q2" etc.
Am I missing a trick, or is CustomJSHover the best way to show the x category?

Heatmap does not show

I am trying to plot a simple heatmap from a dataframe that looks like this:
row column content amount
0 x a c1 1
2 x b c3 3
4 x c c2 1
6 y a c1 1
8 y b c3 3
10 y c c2 1
12 z a c1 1
14 z b c3 3
16 z c c2 1
row and column indicate the position of the cell, the color of it should be chosen based on content and I want tooltips displaying the content and the amount.
I currently try it like this (using bokeh 1.2.0):
import pandas as pd
from bokeh.io import show
from bokeh.models import CategoricalColorMapper, LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, ColumnDataSource
from bokeh.plotting import figure
from bokeh.palettes import all_palettes
from bokeh.transform import transform
df = pd.DataFrame({
'row': list('xxxxxxyyyyyyzzzzzz'),
'column': list('aabbccaabbccaabbcc'),
'content': ['c1', 'c2', 'c3', 'c1', 'c2', 'c3'] * 3,
'amount': list('123212123212123212')})
df = df.drop_duplicates(subset=['row', 'column'])
source = ColumnDataSource(df)
rows = df['row'].unique()
columns = df['column'].unique()
content = df['content'].unique()
colors = all_palettes['Viridis'][max(len(content), 3)]
mapper = CategoricalColorMapper(palette=colors, factors=content)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title="My great heatmap",
x_range=columns, y_range=rows,
x_axis_location="above", plot_width=600, plot_height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('cell content', '#content'), ('amount', '#amount')])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.rect(x="row", y="column", width=1, height=1,
source=source,
fill_color=transform('content', mapper))
# color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
# location=(0, 0))
# p.add_layout(color_bar, 'right')
show(p)
However, there are two issues:
1) When executed, I get an empty heatmap:
Any ideas why?
2) When I outcomment the color_bar = ... part, I receive an error saying:
ValueError: expected an instance of type ContinuousColorMapper, got
CategoricalColorMapper(id='3820', ...) of type CategoricalColorMapper
What am I doing wrong?
Your x and y coordindates are swapped, should be:
p.rect(x="column", y="row", ...)
As for the other message, it is self-explanatory: As of Bokeh 1.2, ColorBar can only be configured with continuous color mappers (e.g. LinearColorMapper). You can either:
compute colors yourself in Python code, and include a column of colors in source, or
re-cast your plot to use a LinearColorMapper (i.e. map content appropriately to some numerical scale)
For your colorBar the solution is here, I quiet did not understand yet what happened to your source, I will dig a bit deeper another time.
The colorBar expected a continuous mapper you gave it a categorical.
from bokeh.models import (CategoricalColorMapper, LinearColorMapper,
BasicTicker, PrintfTickFormatter, ColorBar, ColumnDataSource,
LinearColorMapper)
factors =df['content'].unique().tolist()
colors = all_palettes['Viridis'][max(len(factors), 3)]
mapper = LinearColorMapper(palette=colors)

bokeh - plot a different column using customJS

I have a dataframe of multiple columns. First two columns are x and y coordinates and the rest columns are different property values for (x,y) pairs.
import pandas as pd
import numpy as np
df = pd.DataFrame()
df['x'] = np.random.randint(1,1000,100)
df['y'] = np.random.randint(1,1000,100)
df['val1'] = np.random.randint(1,1000,100)
df['val2'] = np.random.randint(1,1000,100)
df['val3'] = np.random.randint(1,1000,100)
print df.head()
x y val1 val2 val3
0 337 794 449 969 933
1 19 563 592 677 886
2 512 467 664 160 16
3 36 112 91 230 910
4 972 572 336 879 860
Using customJS in Bokeh, I would like to change the value of color in 2-D heatmap by providing a drop down menu.
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models import LinearColorMapper
from bokeh.palettes import RdYlBu11 as palette
p = figure()
source = ColumnDataSource(df)
color_mapper = LinearColorMapper(palette=palette)
p.patches('x', 'y', source=source,\
fill_color={'field': 'val1', 'transform':color_mapper})
show(p)
The above commands plot a color map whose color is determined by the column 'val1'. I would like to plot different columns (either val1, val2, or val3) based on whatever is selected in the drop down menu.
I can create a drop down widget in bokeh by doing
from bokeh.models.widgets import Select
select = Select(title="Option:", value="val1", options=["val1","val2","val3"])
But, I'm not quite sure how I can use the selected value to update the plot by using callback.
Could someone give me a guideline here?
Thanks.
I have included an example with comments inline with the code. The main important steps are to write the javascript code that is executed each time the selected option on the widget changes. The code simply needs to just re-assign which of the columns is set to the values for the 'y' column of the datasource.
The other issue is that your data is just x and y points. The patches glyph will require a different data structure which defines the boundaries of the patches. I believe there are better ways to make a heatmap in bokeh, there should be numerous examples on stack overflow and the bokeh docs.
import pandas as pd
import numpy as np
from bokeh.io import show
from bokeh.layouts import widgetbox,row
from bokeh.models import ColumnDataSource, CustomJS
df = pd.DataFrame()
df['x'] = np.random.randint(1,1000,1000)
df['y'] = np.random.randint(1,1000,1000)
df['val1'] = np.random.randint(1,1000,1000)
df['val2'] = np.random.randint(1,1000,1000)
df['val3'] = np.random.randint(1,1000,1000)
from bokeh.plotting import figure
from bokeh.models import LinearColorMapper
from bokeh.palettes import RdYlBu11 as palette
p = figure(x_range=(0,1000),y_range=(0,1000))
source = ColumnDataSource(df)
source_orig = ColumnDataSource(df)
color_mapper = LinearColorMapper(palette=palette)
p.rect('x', 'y', source=source,width=4,height=4,
color={'field': 'val1', 'transform':color_mapper})
from bokeh.models.widgets import Select
select = Select(title="Option:", value="val1", options=["val1","val2","val3"])
callback = CustomJS(args={'source':source},code="""
// print the selectd value of the select widget -
// this is printed in the browser console.
// cb_obj is the callback object, in this case the select
// widget. cb_obj.value is the selected value.
console.log(' changed selected option', cb_obj.value);
// create a new variable for the data of the column data source
// this is linked to the plot
var data = source.data;
// allocate the selected column to the field for the y values
data['y'] = data[cb_obj.value];
// register the change - this is required to process the change in
// the y values
source.change.emit();
""")
# Add the callback to the select widget.
# This executes each time the selected option changes
select.callback = callback
show(row(p,select))

Unable to populate Bokeh HoverTool Values from Passed in Pandaframe

I am using Bokeh on Jupyter Notebooks to help with data visualization. I wanted to be able to plot the data from a panda DataFrame, and then when I hover over the Bokeh plot, all the feature values should be visible in the hover Box. However, with the code below, only the index correctly displays, and all the other fields appear as ???, and I'm not sure why.
Here is my working example
//Importing all the neccessary things
import numpy as np
import pandas as pd
from bokeh.layouts import row, widgetbox, column
from bokeh.models import CustomJS, Slider, Select, HoverTool
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.io import push_notebook, output_notebook, curdoc
from bokeh.client import push_session
#from bokeh.scatter_with_hover import scatter_with_hover
output_notebook()
np.random.seed(0)
samples = np.random.randint(low = 0, high = 1000, size = 1000)
samples = samples.reshape(200,5)
cols = ["A", "B", "C", "D", "E"]
df = pd.DataFrame(samples, columns=cols)
# Here is a dict of some keys that I want to be able to pick from for plotting
labels = list(df.columns.values)
axis_map = {key:key for key in labels}
code2 = ''' var data = source.data;
//axis values with select widgets
var value1 = val1.value;
var value2 = val2.value;
var original_data = original_source.data
// get data corresponding to selection
x = original_data[value1];
y = original_data[value2];
data['x'] = x;
data['y'] = y;
source.trigger('change');
// set axis labels
x_axis.axis_label = value1;
y_axis.axis_label = value2;
'''
datas = "datas"
source = ColumnDataSource(data=dict( x=df['A'], y=df['B'],
label = labels, datas = df))
original_source = ColumnDataSource(data=df.to_dict(orient='list'))
a= source.data[datas].columns.values
#print a.columns.values
print a
TOOLS = [ HoverTool(tooltips= [(c, '#' + c) for c in source.data[datas].columns.values] +
[('index', '$index')] )]
# hover.tooltips.append(('index', '$index'))
#plot the figures
plot = figure(plot_width=800, plot_height=800, tools= TOOLS)
plot.scatter(x= "x",y="y", source=source, line_width=2, line_alpha=0.6,
size = 3)
callback = CustomJS(args=dict(source=source, original_source = original_source,
x_axis=plot.xaxis[0],y_axis=plot.yaxis[0]), code=code2)
#Create two select widgets to pick the features of interest
x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="A", callback = callback)
callback.args["val1"] = x_axis
callbackDRange.args["val1"]= x_axis
y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="B", callback = callback)
callback.args["val2"] = y_axis
callbackDRange.args["val2"]= y_axis
plot.xaxis[0].axis_label = 'A'
plot.yaxis[0].axis_label = 'B'
#Display the graph in a jupyter notebook
layout = column(plot, x_axis, y_axis )
show(layout, notebook_handle=True)
I'm even passing in the full dataframe into the source ColumnDataSource so I can access it later, but it won't work. Any guidance would be greatly appreciated!
Running your code in recent version of Bokeh results in the warning:
Which suggests the root of the problem. If we actually look at the data source you create for the glyph:
source = ColumnDataSource(data=dict(x=df['A'],
y=df['B'],
label=labels,
datas = df))
It's apparent what two things are going wrong:
You are violating the fundamental assumption that all CDS columns must always be the same length at all times. The CDS is like a "Cheap DataFrame", i.e. it doesn't have ragged columns. All the columns must have the same length just like a DataFrame.
You are configuring a HoverTool to report values from columns named "A", "B", etc. but your data source has no such columns at all! The CDS you create above for your glyph has columns named:
"x", "y"
"labels" which is the wrong length
"datas" which has a bad value, columns should normally be 1-d arrays
The last column "datas" is irrelevant, BTW. I think you must think the hover tool will somehow look in that column for information to display but that is not how hover tool works. Bottom line:
If you configured a tooltip for a field #A then your CDS must have a column named "A"
And that's exactly what's not the case here.
It's hard to say exactly how you should change your code without more context about what exactly you want to do. I guess I'd suggest taking a closer look at the documention for hover tools in the User's Guide

How to add data labels to a bar chart in Bokeh?

In the Bokeh guide there are examples of various bar charts that can be created. http://docs.bokeh.org/en/0.10.0/docs/user_guide/charts.html#id4
This code will create one:
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df
p = Bar(df, 'cyl', values='mpg', title="Total MPG by CYL")
output_file("bar.html")
show(p)
My question is if it's possible to add data labels to each individual bar of the chart? I searched online but could not find a clear answer.
Use Labelset
Use Labelset to create a label over each individual bar
In my example I'm using vbar with the plotting interface, it is a little bit more low level then the Charts interface, but there might be a way to add it into the Bar chart.
from bokeh.palettes import PuBu
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, ranges, LabelSet
from bokeh.plotting import figure
output_notebook()
source = ColumnDataSource(dict(x=['Áætlaðir','Unnir'],y=[576,608]))
x_label = ""
y_label = "Tímar (klst)"
title = "Tímar; núllti til þriðji sprettur."
plot = figure(plot_width=600, plot_height=300, tools="save",
x_axis_label = x_label,
y_axis_label = y_label,
title=title,
x_minor_ticks=2,
x_range = source.data["x"],
y_range= ranges.Range1d(start=0,end=700))
labels = LabelSet(x='x', y='y', text='y', level='glyph',
x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')
plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])
plot.add_layout(labels)
show(plot)
You can find more about labelset here: Bokeh annotations
NOTE FROM BOKEH MAINTAINERS The portions of the answer below that refer to the bokeh.charts are of historical interest only. The bokeh.charts API was deprecated and subsequently removed from Bokeh. See the answers here and above for information on the stable bokeh.plotting API
Yes, you can add labels to each bar of the chart. There are a few ways to do this. By default, your labels are tied to your data. But you can change what is displayed. Here are a few ways to do that using your example:
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df
from bokeh.layouts import gridplot
from pandas import DataFrame
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import Range1d, HoverTool
# output_file("bar.html")
""" Adding some sample labels a few different ways.
Play with the sample data and code to get an idea what does what.
See below for output.
"""
Sample data (new labels):
I used some logic to determine the new dataframe column. Of course you could use another column already in df (it all depends on what data you're working). All you really need here is to supply a new column to the dataframe.
# One method
labels = []
for number in df['cyl']:
if number == 3:
labels.append("three")
if number == 4:
labels.append("four")
if number == 5:
labels.append("five")
if number == 6:
labels.append("six")
if number == 8:
labels.append("eight")
df['labels'] = labels
Another way to get a new dataframe column. Again, we just need to supply df a new column to use on our bar plot.
# Another method
def new_labels(x):
if x % 2 != 0 or x == 6:
y = "Inline"
elif x % 2 == 0:
y = "V"
else:
y = "nan"
return y
df["more_labels"] = df["cyl"].map(new_labels)
Now the bar chart:
I've done it two ways. p1 just specifies the new labels. Note that because I used strings it put them in alphabetical order on the chart. p2 uses the original labels, plus adds my new labels on the same bar.
# Specifying your labels
p1 = Bar(df, label='labels', values='mpg',
title="Total MPG by CYL, remapped labels, p1",
width=400, height=400, legend="top_right")
p2 = Bar(df, label=['cyl', 'more_labels'], values='mpg',
title="Total MPG by CYL, multiple labels, p2", width=400, height=400,
legend="top_right")
Another way:
Bokeh has three main "interface levels". High level charts provides quick easy access but limited functionality; plotting which gives more options; models gives even more options.
Here I'm using the plotting interface and the Figure class that contains a rect method. This gives you more detailed control of your chart.
# Plot with "intermediate-level" bokeh.plotting interface
new_df = DataFrame(df.groupby(['cyl'])['mpg'].sum())
factors = ["three", "four", "five", "six", "eight"]
ordinate = new_df['mpg'].tolist()
mpg = [x * 0.5 for x in ordinate]
p3 = figure(x_range=factors, width=400, height=400,
title="Total MPG by CYL, using 'rect' instead of 'bar', p3")
p3.rect(factors, y=mpg, width=0.75, height=ordinate)
p3.y_range = Range1d(0, 6000)
p3.xaxis.axis_label = "x axis name"
p3.yaxis.axis_label = "Sum(Mpg)"
A fourth way to add specific labels:
Here I'm using the hover plot tool. Hover over each bar to display your specified label.
# With HoverTool, using 'quad' instead of 'rect'
top = [int(x) for x in ordinate]
bottom = [0] * len(top)
left = []
[left.append(x-0.2) for x in range(1, len(top)+1)]
right = []
[right.append(x+0.2) for x in range(1, len(top)+1)]
cyl = ["three", "four", "five", "six", "eight"]
source = ColumnDataSource(
data=dict(
top=[int(x) for x in ordinate],
bottom=[0] * len(top),
left=left,
right=right,
cyl=["three", "four", "five", "six", "eight"],
)
)
hover = HoverTool(
tooltips=[
("cyl", "#cyl"),
("sum", "#top")
]
)
p4 = figure(width=400, height=400,
title="Total MPG by CYL, with HoverTool and 'quad', p4")
p4.add_tools(hover)
p4.quad(top=[int(x) for x in ordinate], bottom=[0] * len(top),
left=left, right=right, color="green", source=source)
p4.xaxis.axis_label = "x axis name"
Show all four charts in a grid:
grid = gridplot([[p1, p2], [p3, p4]])
show(grid)
These are the ways I am aware of. There may be others. Change whatever you like to fit your needs. Here is what running all of this will output (you'll have to run it or serve it to get the hovertool):

Categories

Resources