Bokeh: disable Auto-ranging while using Edit Tools - python

I've included the PolyDrawTool in my Bokeh plot to let users circle points. When a user draws a line near the edge of the plot the tool expands the axes which often messes up the shape. Is there a way to freeze the axes while a user is drawing on the plot?
I'm using bokeh 1.3.4
MRE:
import numpy as np
import pandas as pd
import string
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models import PolyDrawTool, MultiLine
def prepare_plot():
embedding_df = pd.DataFrame(np.random.random((100, 2)), columns=['x', 'y'])
embedding_df['word'] = embedding_df.apply(lambda x: ''.join(np.random.choice(list(string.ascii_lowercase), (8,))), axis=1)
# Plot preparation configuration Data source
source = ColumnDataSource(ColumnDataSource.from_df(embedding_df))
labels = LabelSet(x="x", y="y", text="word", y_offset=-10,x_offset = 5,
text_font_size="10pt", text_color="#555555",
source=source, text_align='center')
plot = figure(plot_width=1000, plot_height=500, active_scroll="wheel_zoom",
tools='pan, box_select, wheel_zoom, save, reset')
# Configure free-hand draw
draw_source = ColumnDataSource(data={'xs': [], 'ys': [], 'color': []})
renderer = plot.multi_line('xs', 'ys', line_width=5, alpha=0.4, color='color', source=draw_source)
renderer.selection_glyph = MultiLine(line_color='color', line_width=5, line_alpha=0.8)
draw_tool = PolyDrawTool(renderers=[renderer], empty_value='red')
plot.add_tools(draw_tool)
# Add the data and labels to plot
plot.circle("x", "y", size=0, source=source, line_color="black", fill_alpha=0.8)
plot.add_layout(labels)
return plot
if __name__ == '__main__':
plot = prepare_plot()
show(plot)

The PolyDrawTool actually updates a ColumnDataSource to drive a glyph that draws what the users indicates. The behavior you are seeing is a natural consequence of that fact, combined with Bokeh's default auto-ranging DataRange1d (which by default also consider every glyph when computing the auto-bounds). So, you have two options:
Don't use DataRange1d at all, e.g. you can provide fixed axis bounds when you call figure:
p = figure(..., x_range=(0,10), y_range=(-20, 20)
or you can set them after the fact:
p.x_range = Range1d(0, 10)
p.y_range = Range1d(-20, 20)
Of course, with this approach you will no longer get any auto-ranging at all; you will need to set the axis ranges to exactly the start/end that you want.
Make DataRange1d be more selective by explicitly setting its renderers property:
r = p.circle(...)
p.x_range.renderers = [r]
p.y_range.renderers = [r]
Now the DataRange models will only consider the circle renderer when computing the auto-ranged start/end.

Related

Heatmap with circles indicating size of population

I would like to produce a heatmap in Python, similar to the one shown, where the size of the circle indicates the size of the sample in that cell. I looked in seaborn's gallery and couldn't find anything, and I don't think I can do this with matplotlib.
It's the inverse. While matplotlib can do pretty much everything, seaborn only provides a small subset of options.
So using matplotlib, you can plot a PatchCollection of circles as shown below.
Note: You could equally use a scatter plot, but since scatter dot sizes are in absolute units it would be rather hard to scale them into the grid.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
N = 10
M = 11
ylabels = ["".join(np.random.choice(list("PQRSTUVXYZ"), size=7)) for _ in range(N)]
xlabels = ["".join(np.random.choice(list("ABCDE"), size=3)) for _ in range(M)]
x, y = np.meshgrid(np.arange(M), np.arange(N))
s = np.random.randint(0, 180, size=(N,M))
c = np.random.rand(N, M)-0.5
fig, ax = plt.subplots()
R = s/s.max()/2
circles = [plt.Circle((j,i), radius=r) for r, j, i in zip(R.flat, x.flat, y.flat)]
col = PatchCollection(circles, array=c.flatten(), cmap="RdYlGn")
ax.add_collection(col)
ax.set(xticks=np.arange(M), yticks=np.arange(N),
xticklabels=xlabels, yticklabels=ylabels)
ax.set_xticks(np.arange(M+1)-0.5, minor=True)
ax.set_yticks(np.arange(N+1)-0.5, minor=True)
ax.grid(which='minor')
fig.colorbar(col)
plt.show()
Here's a possible solution using Bokeh Plots:
import pandas as pd
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np
output_notebook()
d = dict(x = ['A','A','A', 'B','B','B','C','C','C','D','D','D'],
y = ['B','C','D', 'A','C','D','B','D','A','A','B','C'],
corr = np.random.uniform(low=-1, high=1, size=(12,)).tolist())
df = pd.DataFrame(d)
df['size'] = np.where(df['corr']<0, np.abs(df['corr']), df['corr'])*50
#added a new column to make the plot size
colors = list(reversed(RdBu[9]))
exp_cmap = LinearColorMapper(palette=colors,
low = -1,
high = 1)
p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width=700,
plot_height=450, title="Correlation",
toolbar_location=None, tools="hover")
p.scatter("x","y",source=df, fill_alpha=1, line_width=0, size="size",
fill_color={"field":"corr", "transform":exp_cmap})
p.x_range.factors = sorted(df['x'].unique().tolist())
p.y_range.factors = sorted(df['y'].unique().tolist(), reverse = True)
p.xaxis.axis_label = 'Values'
p.yaxis.axis_label = 'Values'
bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p.add_layout(bar, "right")
show(p)
One option is to use matplotlib's scatter plots with legends and grid. You can specify size of those circles with specifying the scales. You can also change the color of each circle. You should somehow specify X,Y values so that the circles sit straight on lines. This is an example I got from here:
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
# Because the price is much too small when being provided as size for ``s``,
# we normalize it to some useful point sizes, s=0.3*(price*3)**2
scatter = ax.scatter(volume, amount, c=ranking, s=0.3*(price*3)**2,
vmin=-3, vmax=3, cmap="Spectral")
# Produce a legend for the ranking (colors). Even though there are 40 different
# rankings, we only want to show 5 of them in the legend.
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# Produce a legend for the price (sizes). Because we want to show the prices
# in dollars, we use the *func* argument to supply the inverse of the function
# used to calculate the sizes from above. The *fmt* ensures to show the price
# in dollars. Note how we target at 5 elements here, but obtain only 4 in the
# created legend due to the automatic round prices that are chosen for us.
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: np.sqrt(s/.3)/3)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
plt.show()
Output:
I don't have enough reputation to comment on Delenges' excellent answer, so I'll leave my comment as an answer instead:
R.flat doesn't order the way we need it to, so the circles assignment should be:
circles = [plt.Circle((j,i), radius=R[j][i]) for j, i in zip(x.flat, y.flat)]
Here is an easy example to plot circle_heatmap.
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap
plt.style.use('seaborn-talk')
data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])
#data = df_corr_selected
kwargs = {
'heatmap': {
'vmin': -1,
'vmax': 1,
'cmap': 'viridis',
},
'figure': {
'figsize': (14, 10),
},
}
plot_correlation_heatmap(data, bubble=True, annotate=False, **kwargs)
plt.show()

Bokeh line 'disappearing' when using hover_line_alpha

I'm using the Bokeh package to plot a line chart.
I want a given line to bolden (alpha to increase) when I hover over it.
I added a hover tool and then added "hover_line_alpha = 0.6" in my line chart.
However when I hover over points on a given line, the line disappears altogether!
Can you help me fix this?
Code below so you can see my logic.
Thanks,
Ross
# Code in Question
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool
output_notebook()
# set out axes
x = 'time_rnd'
y = 'count'
# set colour palette
col_brew = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f']
# map out figure
plot = figure(tools='box_select, lasso_select, save' ,x_axis_type='datetime')
# add HoverTool
hover_info = [('time', '#hover_time'),
('word', '#word'),
('count', '#count')]
hover = HoverTool(names=['use'],tooltips=hover_info,
mode='mouse',
show_arrow=True
)
plot.add_tools(hover)
### FOR LOOP OF PLOT [THIS IS WHERE THE ISSUE MANIFESTS]
for i in top_wds_test:
df_eng_word = df_eng_timeline[df_eng_timeline['word']==i]
source = ColumnDataSource(df_eng_word)
plot.line(x, y, line_width = 3,
line_alpha = 0.1, line_color=col_brew[top_wds.index(i)],
hover_line_alpha = 0.6,
#hover_line_color = 'black',
#hover_line_color = col_brew[top_wds.index(i)],
source = source, legend=i, name = 'use'
)
plot.circle(x, y, fill_color='white', size=5,
selection_color='green',
nonselection_fill_color='grey',nonselection_fill_alpha=0.4,
hover_color='red',
source = source, name = 'use')
# add legend
plot.legend.location = "top_left"
plot.legend.label_text_font_style = 'bold'
# materialize the plot
show(plot)
There seems to be an issue when the renderers share a data source. However, this works (with Bokeh >= 0.13.0) if you let Bokeh create a new separate source for each glyph:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
p = figure(tools="hover", tooltips="$name: #$name")
data=dict(x=[1,2,3], y1=[2,6,5], y2=[6,2,3])
p.line('x', 'y1', color="navy", line_width=3, source=data,
alpha=0.1, hover_color="navy", hover_alpha=0.6, name="y1")
p.line('x', 'y2',color="firebrick", line_width=3, source=data,
alpha=0.1, hover_color="firebrick", hover_alpha=0.6, name="y2")
show(p)

Changing bokeh grid lines position

I am trying to plot a few points on a graph, similarly to a heat map.
Sample code (adapted from the heat map section here):
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
import numpy as np
# change this if you don't run it on a Jupyter Notebook
output_notebook()
testx = np.random.randint(0,10,10)
testy = np.random.randint(0,10,10)
npdata = np.stack((testx,testy), axis = 1)
hist, bins = np.histogramdd(npdata, normed = False, bins = (10,10), range=((0,10),(0,10)))
data = pd.DataFrame(hist, columns = [str(x) for x in range(10)])
data.columns.name = 'y'
data['x'] = [str(x) for x in range(10)]
data = data.set_index('x')
df = pd.DataFrame(data.stack(), columns=['present']).reset_index()
source = ColumnDataSource(df)
colors = ['lightblue', "yellow"]
mapper = LinearColorMapper(palette=colors, low=df.present.min(), high=df.present.max())
p = figure(plot_width=400, plot_height=400, title="test circle map",
x_range=list(data.index), y_range=list((data.columns)),
toolbar_location=None, tools="", x_axis_location="below")
p.circle(x="x", y="y", size=20, source=source,
line_color=None, fill_color=transform('present', mapper))
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 10
p.xaxis.major_label_orientation = 0
show(p)
That returns:
Now, as you can see, the grid lines are centered on the points(circles), and I would like, instead to have the circles enclosed in a square created by the lines.
I went through this to see if I could find information on how to offset the grid lines by 0.5 (that would have worked), but I was not able to.
There's nothing built into Bokeh to accomplish this kind of offsetting of categorical ticks, but you can write a custom extension to do it:
CS_CODE = """
import {CategoricalTicker} from "models/tickers/categorical_ticker"
export class MyTicker extends CategoricalTicker
type: "MyTicker"
get_ticks: (start, end, range, cross_loc) ->
ticks = super(start, end, range, cross_loc)
# shift the default tick locations by half a categorical bin width
ticks.major = ([x, 0.5] for x in ticks.major)
return ticks
"""
class MyTicker(CategoricalTicker):
__implementation__ = CS_CODE
p.xgrid.ticker = MyTicker()
p.ygrid.ticker = MyTicker()
Note that Bokeh assumes CoffeeScript by default when the code is just a string, but it's possible to use pure JS or TypeScript as well. Adding this to your code yields:
Please note the comment about output_notebook you must call it (possibly again, if you have called it previously) after the custom model is defined, due to #6107

Bokeh x_axis_type for time duration?

The x-axis on my bokeh plot represents a time duration like five seconds rather than a time like 2016-01-01 12:00:00. Is there a way to render the ticks on my Bokeh x-axis appropriately? Setting x_axis_type='datetime' doesn't do quite the right thing, as can be seen from the repetition of 0ms in the plot below:
On Bokeh 0.12.6, you can use PrintfTickFormatter.
from bokeh.plotting import figure, output_file, show
from bokeh.models import PrintfTickFormatter
output_file('output.html')
p = figure(plot_width=400, plot_height=400) p.line(x, y, size=1)
# must be applied to the 1st element, not the axis itself
p.xaxis[0].formatter = PrintfTickFormatter(format="%sms")
show(p)
You don't even have to set x_axis_type='datetime', it will work even with a linear axis.
EDIT: To apply custom formatting of units, such as ms/s/min, you have to use FuncTickFormatter, because it is too sophisticated for Bokeh to handle at the moment. There are two ways to use it as of 0.12.6.
First, by using the transpiler to convert a Python function to Javascript code, via Flexx (pip install flexx). It keeps everything under the Python syntax, but requires an additional dependence.
from bokeh.plotting import figure, output_file, show
from bokeh.models import FuncTickFormatter
output_file('output.html')
p = figure(plot_width=400, plot_height=400) p.line(x, y, size=1)
# custom formatter function
def custom_formatter():
units = [
('min', 60000.0),
('s', 1000.0),
('ms', 1.0),
]
for u in units:
if tick >= u[1]:
return '{}{}'.format(tick / u[1], u[0])
# must be applied to the 1st element, not the axis itself
p.xaxis[0].formatter = FuncTickFormatter.from_py_func(custom_formatter)
show(p)
Lastly, by writing actual Javascript code as a string and passing as a parameter to the formatter. Bokeh does it natively. Bear in mind you have no control over the client environment, so avoid using anything other than pure vanilla Javascript.
from bokeh.plotting import figure, output_file, show
from bokeh.models import FuncTickFormatter
output_file('output.html')
p = figure(plot_width=400, plot_height=400) p.line(x, y, size=1)
units = [
('min', 60000.0),
('s', 1000.0),
('ms', 1.0),
]
# must be applied to the 1st element, not the axis itself
p.xaxis[0].formatter = FuncTickFormatter(code=""" var units = {'min':
60000.0, 's': 1000.0, 'ms': 1.0}; for (u in units) {
if (tick >= units[u]) {
return (tick / units[u] + u);
} } """)
show(p)
I find it a bit annoying, but that's how I fixed the axis for my application. I find the need to hardcode a variable named tick a terrible programming practice. Hopefully, Bokeh will provide a better solution in the near future.

Bokeh hovertool in multiple_line plot

I'm new to bokeh and I just jumped right into using hovertool as that's why I wanted to use bokeh in the first place.
Now I'm plotting genes and what I want to achieve is multiple lines with the same y-coordinate and when you hover over a line you get the name and position of this gene.
I have tried to mimic this example, but for some reason the I can't even get it to show coordinates.
I'm sure that if someone who actually knows their way around bokeh looks at this code, the mistake will be apparent and I'd be very thankful if they showed it to me.
from bokeh.plotting import figure, HBox, output_file, show, VBox, ColumnDataSource
from bokeh.models import Range1d, HoverTool
from collections import OrderedDict
import random
ys = [10 for x in range(len(levelsdf2[(name, 'Start')]))]
xscale = zip(levelsdf2[('Log', 'Start')], levelsdf2[('Log', 'Stop')])
yscale = zip(ys,ys)
TOOLS="pan,wheel_zoom,box_zoom,reset,hover"
output_file("scatter.html")
hover_tips = levelsdf2.index.values
colors = ["#%06x" % random.randint(0,0xFFFFFF) for c in range(len(xscale))]
source = ColumnDataSource(
data=dict(
x=xscale,
y=yscale,
gene=hover_tips,
colors=colors,
)
)
p1 = figure(plot_width=1750, plot_height=950,y_range=[0, 15],tools=TOOLS)
p1.multi_line(xscale[1:10],yscale[1:10], alpha=1, source=source,line_width=10, line_color=colors[1:10])
hover = p1.select(dict(type=HoverTool))
hover.tooltips = [
("index", "$index"),
("(x,y)", "($x, $y)"),
]
show(p1)
the levelsdf2 is a pandas.DataFrame, if it matters.
I figured it out on my own. It turns out that version 0.8.2 of Bokeh doesn't allow hovertool for lines so I did the same thing using quads.
from bokeh.plotting import figure, HBox, output_file, show, VBox, ColumnDataSource
from bokeh.models import Range1d, HoverTool
from collections import OrderedDict
import random
xscale = zip(levelsdf2[('series1', 'Start')], levelsdf2[('series1', 'Stop')])
xscale2 = zip(levelsdf2[('series2', 'Start')], levelsdf2[('series2', 'Stop')])
yscale2 = zip([9.2 for x in range(len(levelsdf2[(name, 'Start')]))],[9.2 for x in range(len(levelsdf2[(name, 'Start')]))])
TOOLS="pan,wheel_zoom,box_zoom,reset,hover"
output_file("linesandquads.html")
hover_tips = levelsdf2.index.values
colors = ["#%06x" % random.randint(0,0xFFFFFF) for c in range(len(xscale))]
proc1 = 'Log'
proc2 = 'MazF2h'
expression1 = levelsdf2[(proc1, 'Level')]
expression2 = levelsdf2[(proc2, 'Level')]
source = ColumnDataSource(
data=dict(
start=[min(xscale[x]) for x in range(len(xscale))],
stop=[max(xscale[x]) for x in range(len(xscale))],
start2=[min(xscale2[x]) for x in range(len(xscale2))],
stop2=[max(xscale2[x]) for x in range(len(xscale2))],
gene=hover_tips,
colors=colors,
expression1=expression1,
expression2=expression2,
)
)
p1 = figure(plot_width=900, plot_height=500,y_range=[8,10.5],tools=TOOLS)
p1.quad(left="start", right="stop", top=[9.211 for x in range(len(xscale))],
bottom = [9.209 for x in range(len(xscale))], source=source, color="colors")
p1.multi_line(xscale2,yscale2, source=source, color="colors", line_width=20)
hover = p1.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
(proc1+" (start,stop, expression)", "(#start| #stop| #expression1)"),
("Gene","#gene"),
])
show(p1)
Works like a charm.
EDIT: Added a picture of the result, as requested and edited code to match the screenshot posted.
It's not the best solution as it turns out it's not all that easy to plot several series of quads on one plot. It's probably possible but as it didn't matter much in my use case I didn't investigate too vigorously.
As all genes are represented on all series at the same place I just added tooltips for all series to the quads and plotted the other series as multi_line plots on the same figure.
This means that if you hovered on the top line at 9.21 you'd get tooltips for the line at 9.2 as well, but If you hovered on the 9.2 line you wouldn't get a tooltip at all.

Categories

Resources