I'm getting this error:
TypeError: Object of type Interval is not JSON serializable
Here is my code.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models import NumeralTickFormatter
def construct_labels(start, end):
labels = []
for index, x in enumerate(start):
y = end[index]
labels.append('({}, {}]'.format(x, y))
return labels
values = {'Length': np.random.uniform(0, 4, 10)}
df = pd.DataFrame(values, columns=['Length'])
bin_step_size = 0.5
# List of bin points.
p_bins = np.arange(0, (df['Length'].max() + bin_step_size), bin_step_size)
# Reduce the tail to create the left side bounds.
p_left_limits = p_bins[:-1].copy()
# Cut the head to create the right side bounds.
p_right_limits = np.delete(p_bins, 0)
# Create the bins.
p_range_bins = pd.IntervalIndex.from_arrays(p_left_limits, p_right_limits)
# Create labels.
p_range_labels = construct_labels(p_left_limits, p_right_limits)
p_ranges_binned = pd.cut(
df['Length'],
p_range_bins,
labels=p_range_labels,
precision=0,
include_lowest=True)
out = p_ranges_binned
counts = out.value_counts(sort=False)
total_element_count = len(df.index)
foo = pd.DataFrame({'bins': counts.index, 'counts': counts})
foo.reset_index(drop=True, inplace=True)
foo['percent'] = foo['counts'].apply(lambda x: x / total_element_count)
foo['percent_full'] = foo['counts'].apply(lambda x: x / total_element_count * 100)
bin_labels = p_range_labels
# Data Container
source = ColumnDataSource(dict(
bins=foo['bins'],
percent=foo['percent'],
count=foo['counts'],
labels=pd.DataFrame({'labels': bin_labels})
))
p = figure(x_range=bin_labels, plot_height=600, plot_width=1200, title="Range Counts",
toolbar_location=None, tools="")
p.vbar(x='labels', top='percent', width=0.9, source=source)
p.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")
p.xaxis.major_label_orientation = math.pi / 2
p.xgrid.grid_line_color = None
p.y_range.start = 0
output_file("bars.html")
show(p)
The error comes from here:
source = ColumnDataSource(dict(
bins=foo['bins'],
percent=foo['percent'],
count=foo['counts'],
labels=pd.DataFrame({'labels': bin_labels})
))
The bins you passed in is a interval type that cannot be JSON serialized.
After review your code, this bins variable is not used in your plotting, so you can change it to:
source = ColumnDataSource(dict(
percent=foo['percent'],
count=foo['counts'],
labels=bin_labels
))
Notice that I also changed your labels to bin_labels, which is a list and ColumnDataSource can use list as input. But you may want to further format these labels, as right now they are just like
['(0.0, 0.5]',
'(0.5, 1.0]',
'(1.0, 1.5]',
'(1.5, 2.0]',
'(2.0, 2.5]',
'(2.5, 3.0]',
'(3.0, 3.5]',
'(3.5, 4.0]']
You might want to format them to something prettier.
After this small change you should be able to see your bar graph:
Related
I am trying to add a RangeSlider for dates to a graph in Bokeh 2.0.
Code looks like that (data comes from a dataframe in pandas):
p = figure(plot_width = 800, plot_height = 350, x_axis_type = "datetime")
p.line(df['date'], df['cases'], color='navy', alpha=0.5, legend_label = "cases", line_width = 2)
How do I add a slider to set and narrow the focus?
Thanks and cheers,
Ulrich
A couple of things to note:
It will produce the CDSView filters are not compatible with glyphs with connected topology suchs as Line and Patch warning. I leave it to you to decide whether it's something you care about
Changing the slider value will not alter the data range - I think, for the exact same reason, simply because using circle instead of line works just fine
import pandas as pd
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, BooleanFilter, CDSView, DateRangeSlider, CustomJS
from bokeh.plotting import figure
df = pd.DataFrame(dict(date=['2020-01-01', '2020-01-02', '2020-01-03'], cases=[1, 2, 3]))
df['date'] = pd.to_datetime(df['date'])
p = figure(plot_width=800, plot_height=350, x_axis_type="datetime")
init_value = (df['date'].min(), df['date'].max())
slider = DateRangeSlider(start=init_value[0], end=init_value[1], value=init_value)
ds = ColumnDataSource(df)
date_filter = BooleanFilter(booleans=[True] * df.shape[0])
slider.js_on_change('value', CustomJS(args=dict(f=date_filter, ds=ds),
code="""\
const [start, end] = cb_obj.value;
f.booleans = Array.from(ds.data['date']).map(d => (d >= start && d <= end));
// Needed because of https://github.com/bokeh/bokeh/issues/7273
ds.change.emit();
"""))
p.circle('date', 'cases', source=ds, view=CDSView(source=ds, filters=[date_filter]),
color='navy', alpha=0.5, legend_label="cases", line_width=2)
show(column(p, slider))
I was able to create two linked plots using holoviews + bokeh backend, basically following this code example.
Here's an example of code from the reference:
import pandas as pd
import numpy as np
import holoviews as hv
import seaborn as sns
from holoviews import opts
hv.extension('bokeh', width=90)
# Declare dataset
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
# Declare HeatMap
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))
# Declare Tap stream with heatmap as source and initial values
posxy = hv.streams.Tap(source=heatmap, x='total_bill', y='tip')
# Define function to compute histogram based on tap location
def tap_histogram(x, y):
m, b = np.polyfit(df[x], df[y], deg=1)
x_data = np.linspace(df.tip.min(), df.tip.max())
y_data = m*x_data + b
right = (hv.Curve((x_data, y_data), x, y)
* hv.Scatter((df[x], df[y]), x, y))
right.opts(opts.Scatter(
height=400, width=400, color='red', ylim=(0, 100),
framewise=True, tools=['hover']))
return right
tap_dmap = hv.DynamicMap(tap_histogram, streams=[posxy])
(heatmap + tap_dmap).opts(
opts.HeatMap(tools=['tap', 'hover'],
height=400, width=400, toolbar='above'),
opts.Curve(framewise=True))
Now, I wanna create a hover tool specifying the different parameters on the dependent plot.
So far I am only being able to use the default hover (.opts(tools['hover'])) as in the code above.
When I try to build a custom hover to dynamically change the fields based on x and y streamed values, it does not update the hover after tapping on the heatmap. It only keeps the initial values of x and y.
Here's an example of my current code:
Try to tap in total_bil x size, for example.
import pandas as pd
import numpy as np
import holoviews as hv
import seaborn as sns
from holoviews import opts
from bokeh.models import HoverTool
hv.extension('bokeh', width=90)
# Declare dataset
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
# Declare HeatMap
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))
# Declare Tap stream with heatmap as source and initial values
posxy = hv.streams.Tap(source=heatmap, x='total_bill', y='tip')
# Define function to compute histogram based on tap location
def tap_histogram(x, y):
m, b = np.polyfit(df[x], df[y], deg=1)
x_data = np.linspace(df.tip.min(), df.tip.max())
y_data = m*x_data + b
right = (hv.Curve((x_data, y_data), x, y)
* hv.Scatter((df[x], df[y]), x, y))
tooltips = [(x, '#'+x),
(y, '#'+y)
]
hover = HoverTool(tooltips=tooltips)
right.opts(opts.Scatter(
height=400, width=400, color='red', ylim=(0, 100),
framewise=True, tools=[hover]))
return right
tap_dmap = hv.DynamicMap(tap_histogram, streams=[posxy])
(heatmap + tap_dmap).opts(
opts.HeatMap(tools=['tap', 'hover'],
height=400, width=400, toolbar='above'),
opts.Curve(framewise=True))
I'm trying to plot a simple heatmap using bokeh/holoviews. My data (pandas dataframe) has categoricals (on y) and datetime (on x). The problem is that the number of categorical elements is >3000 and the resulting plot appears with messed overlapped tickers on the y axis that makes it totally useless. Currently, is there a reliable way in bokeh to select only a subset of the tickers based on the zoom level?
I've already tried plotly and the result looks perfect but however I need to use bokeh/holoviews and datashader. I want also avoid to replace categoricals with numericals tickers.
I've also tried this solution but actually it doesn't work (bokeh 1.2.0).
This is a toy example representing my use case (Actually here #y is 1000 but it gives the idea)
from datetime import datetime
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
from bokeh.io import output_notebook
output_notebook()
# build sample data
index = pd.date_range(start='1/1/2019', periods=1000, freq='T')
data = np.random.rand(1000,100)
columns = ['col'+ str(n) for n in range(100)]
# initial data format
df = pd.DataFrame(data=data, index=index, columns=columns)
# bokeh
df = df.stack().reset_index()
df.rename(columns={'level_0':'x','level_1':'y', 0:'z'},inplace=True)
df.sort_values(by=['y'],inplace=True)
x = [
date.to_datetime64().astype('M8[ms]').astype('O')
for date in df.x.to_list()
]
data = {
'value': df.z.to_list(),
'x': x,
'y': df.y.to_list(),
'date' : df.x.to_list()
}
p = figure(x_axis_type='datetime', y_range=columns, width=900, tooltips=[("x", "#date"), ("y", "#y"), ("value", "#value")])
p.rect(x='x', y='y', width=60*1000, height=1, line_color=None,
fill_color=linear_cmap('value', 'Viridis256', low=df.z.min(), high=df.z.max()), source=data)
show(p)
Finally, I partially followed the suggestion from James and managed to get it to work using a python callback for the ticker. This solution was hard to find for me. I really searched all the Bokeh docs, examples and source code for days.
The main problem for me is that in the doc is not mentioned how I can use "ColumnDataSource" objects in the custom callback.
https://docs.bokeh.org/en/1.2.0/docs/reference/models/formatters.html#bokeh.models.formatters.FuncTickFormatter.from_py_func
Finally, this helped a lot:
https://docs.bokeh.org/en/1.2.0/docs/user_guide/interaction/callbacks.html#customjs-with-a-python-function.
So, I modified the original code as follow in the hope it can be useful to someone:
from datetime import datetime
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter
from bokeh.models import ColumnDataSource
output_notebook()
# build sample data
index = pd.date_range(start='1/1/2019', periods=1000, freq='T')
data = np.random.rand(1000,100)
columns_labels = ['col'+ str(n) for n in range(100)]
columns = [n for n in range(100)]
# initial data format
df = pd.DataFrame(data=data, index=index, columns=columns)
# bokeh
df = df.stack().reset_index()
df.rename(columns={'level_0':'x','level_1':'y', 0:'z'},inplace=True)
df.sort_values(by=['y'],inplace=True)
x = [
date.to_datetime64().astype('M8[ms]').astype('O')
for date in df.x.to_list()
]
data = {
'value': df.z.to_list(),
'x': x,
'y': df.y.to_list(),
'y_labels_tooltip' : [columns_labels[k] for k in df.y.to_list()],
'y_ticks' : columns_labels*1000,
'date' : df.x.to_list()
}
cd = ColumnDataSource(data=data)
def ticker(source=cd):
labels = source.data['y_ticks']
return "{}".format(labels[tick])
#p = figure(x_axis_type='datetime', y_range=columns, width=900, tooltips=[("x", "#date{%F %T}"), ("y", "#y_labels"), ("value", "#value")])
p = figure(x_axis_type='datetime', width=900, tooltips=[("x", "#date{%F %T}"), ("y", "#y_labels_tooltip"), ("value", "#value")])
p.rect(x='x', y='y', width=60*1000, height=1, line_color=None,
fill_color=linear_cmap('value', 'Viridis256', low=df.z.min(), high=df.z.max()), source=cd)
p.hover.formatters = {'date': 'datetime'}
p.yaxis.formatter = FuncTickFormatter.from_py_func(ticker)
p.yaxis[0].ticker.desired_num_ticks = 20
show(p)
The result is this:
I am trying to plot a few points on a graph, similarly to a heat map.
Sample code (adapted from the heat map section here):
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
import numpy as np
# change this if you don't run it on a Jupyter Notebook
output_notebook()
testx = np.random.randint(0,10,10)
testy = np.random.randint(0,10,10)
npdata = np.stack((testx,testy), axis = 1)
hist, bins = np.histogramdd(npdata, normed = False, bins = (10,10), range=((0,10),(0,10)))
data = pd.DataFrame(hist, columns = [str(x) for x in range(10)])
data.columns.name = 'y'
data['x'] = [str(x) for x in range(10)]
data = data.set_index('x')
df = pd.DataFrame(data.stack(), columns=['present']).reset_index()
source = ColumnDataSource(df)
colors = ['lightblue', "yellow"]
mapper = LinearColorMapper(palette=colors, low=df.present.min(), high=df.present.max())
p = figure(plot_width=400, plot_height=400, title="test circle map",
x_range=list(data.index), y_range=list((data.columns)),
toolbar_location=None, tools="", x_axis_location="below")
p.circle(x="x", y="y", size=20, source=source,
line_color=None, fill_color=transform('present', mapper))
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 10
p.xaxis.major_label_orientation = 0
show(p)
That returns:
Now, as you can see, the grid lines are centered on the points(circles), and I would like, instead to have the circles enclosed in a square created by the lines.
I went through this to see if I could find information on how to offset the grid lines by 0.5 (that would have worked), but I was not able to.
There's nothing built into Bokeh to accomplish this kind of offsetting of categorical ticks, but you can write a custom extension to do it:
CS_CODE = """
import {CategoricalTicker} from "models/tickers/categorical_ticker"
export class MyTicker extends CategoricalTicker
type: "MyTicker"
get_ticks: (start, end, range, cross_loc) ->
ticks = super(start, end, range, cross_loc)
# shift the default tick locations by half a categorical bin width
ticks.major = ([x, 0.5] for x in ticks.major)
return ticks
"""
class MyTicker(CategoricalTicker):
__implementation__ = CS_CODE
p.xgrid.ticker = MyTicker()
p.ygrid.ticker = MyTicker()
Note that Bokeh assumes CoffeeScript by default when the code is just a string, but it's possible to use pure JS or TypeScript as well. Adding this to your code yields:
Please note the comment about output_notebook you must call it (possibly again, if you have called it previously) after the custom model is defined, due to #6107
I'm trying to adapt the brewer example (http://docs.bokeh.org/en/latest/docs/gallery/stacked_area.html) to my needs. One of the things I'd like is to have dates at the x-axis. I did the following:
timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-07-01', freq='MS')]
p = figure(x_range=FactorRange(factors=timesteps), y_range=(0, 800))
p.xaxis.major_label_orientation = np.pi/4
as an adaptation of the previous line
p = figure(x_range=(0, 19), y_range=(0, 800))
The dates are displayed, but the first date 1950-01-01 sits at x=1. How can I shift it to x=0? The first real data points I have are for that date and therefore should be displayed together with that date and not one month later.
Well, if you have a list of strings as your x axis, then apparently the count starts at 1, then you have to modify your x data for the plot to start at 1. Actually the brewer example (http://docs.bokeh.org/en/latest/docs/gallery/stacked_area.html) has a range from 0 to 19, so it has 20 data points not 19 like your timesteps list. I modified the x input for the plot as : data['x'] = np.arange(1,N+1) to start from 1 to N. And I added one more day to your list: timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-08-01', freq='MS')]
Here is the complete code:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import brewer
N = 20
categories = ['y' + str(x) for x in range(10)]
data = {}
data['x'] = np.arange(1,N+1)
for cat in categories:
data[cat] = np.random.randint(10, 100, size=N)
df = pd.DataFrame(data)
df = df.set_index(['x'])
def stacked(df, categories):
areas = dict()
last = np.zeros(len(df[categories[0]]))
for cat in categories:
next = last + df[cat]
areas[cat] = np.hstack((last[::-1], next))
last = next
return areas
areas = stacked(df, categories)
colors = brewer["Spectral"][len(areas)]
x2 = np.hstack((data['x'][::-1], data['x']))
timesteps = [str(x.date()) for x in pd.date_range('1950-01-01', '1951-08-01', freq='MS')]
p = figure(x_range=bokeh.models.FactorRange(factors=timesteps), y_range=(0, 800))
p.grid.minor_grid_line_color = '#eeeeee'
p.patches([x2] * len(areas), [areas[cat] for cat in categories],
color=colors, alpha=0.8, line_color=None)
p.xaxis.major_label_orientation = np.pi/4
bokeh.io.show(p)
And here is the output:
UPDATE
You can leave data['x'] = np.arange(0,N) from 0 to 19, and then use offset=-1 inside FactorRange, i.e. figure(x_range=bokeh.models.FactorRange(factors=timesteps,offset=-1),...
Update version bokeh 0.12.16
In this version I am using datetime for x axis which has the advantage of nicer formatting when zooming in.
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import brewer
timesteps = [x for x in pd.date_range('1950-01-01', '1951-07-01', freq='MS')]
N = len(timesteps)
cats = 10
df = pd.DataFrame(np.random.randint(10, 100, size=(N, cats))).add_prefix('y')
def stacked(df):
df_top = df.cumsum(axis=1)
df_bottom = df_top.shift(axis=1).fillna({'y0': 0})[::-1]
df_stack = pd.concat([df_bottom, df_top], ignore_index=True)
return df_stack
areas = stacked(df)
colors = brewer['Spectral'][areas.shape[1]]
x2 = np.hstack((timesteps[::-1], timesteps))
p = figure( x_axis_type='datetime', y_range=(0, 800))
p.grid.minor_grid_line_color = '#eeeeee'
p.patches([x2] * areas.shape[1], [areas[c].values for c in areas],
color=colors, alpha=0.8, line_color=None)
p.xaxis.formatter = bokeh.models.formatters.DatetimeTickFormatter(
months=["%Y-%m-%d"])
p.xaxis.major_label_orientation = 3.4142/4
output_file('brewer.html', title='brewer.py example')
show(p)