Drawing a heatmap in BokeH displays empty graph - python

I am trying to draw a heatmap(spectrogram) in bokeh but when the heatmap displays it is empty..
This is the code which has some simply sample data, but this would be extended to fetch a large dataset via json.
from math import pi
import pandas as pd
from bokeh.io import show
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar
from bokeh.plotting import figure
# initialise data of lists.
data = {'epoch':[63745131000000, 63745131000000, 63745131100000,63745131100000], 'energy':[1.06811, 1.22078, 1.59495, 1.82245],'value':[3981.9308143034305, 2868.5202872178324, 1330.887696894385, 745.6847248644897]}
# Creates pandas DataFrame.
df = pd.DataFrame(data)
# print the data
print(df)
# this is the colormap from the original NYTimes plot
colors = ['#00007F', '#0000ff', '#007FFF', '#00ffff', '#7FFF7F', '#ffff00', '#FF7F00', '#ff0000', '#7F0000']
mapper = LinearColorMapper(palette=colors, low=df.value.min(), high=df.value.max())
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
epochs = list(df.epoch.drop_duplicates())
print(epochs)
energies = list(df.energy.drop_duplicates())
print(energies)
p = figure(title="My Plot",
x_axis_location="below",
tools=TOOLS, toolbar_location='below',
tooltips=[('epoch', '#epoch'), ('energy', '#energy'), ('value', '#value')])
p.xaxis.ticker = epochs
p.yaxis.ticker = energies
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
p.rect(x="epoch", y="energy", width=1, height=1,
source=df,
fill_color={'field': 'value', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)
The frame which is output looks correct:
epoch energy value
0 63745131000000 1.06811 3981.930814
1 63745131000000 1.22078 2868.520287
2 63745131100000 1.59495 1330.887697
3 63745131100000 1.82245 745.684725
and the ranges for the x and y look ok as well:
[63745131000000, 63745131100000]
[1.06811, 1.22078, 1.59495, 1.82245]
But the image that appears has no points plotted:
I should mention, if I simply change the second epoch to one after e.g)
'epoch':[63745131000000, 63745131000000, 63745131000001,63745131000001]
Then the chart seems to be displayed correctly:
Grateful for any help.
Thanks

The reason why there is no information showing up is because at the edge bokeh apparently does not think your part of the data is something that deserves a color.
What you should change is the limits in you mapper:
mapper = LinearColorMapper(palette=colors, low=df.value.min()-1, high=df.value.max()+1) # this will make sure your data is inside the mapping
Also your width is defined in the figure a being 1. When your epochs are differing with a million you will still see almost nothing when you are plotting this so change
p.rect(x="epoch", y="energy", width=100000, height=1, # here width is set to an adequate level.
source=df,
fill_color={'field': 'value', 'transform': mapper},
line_color=None)

Related

How to place images using image_url in Bokeh

I have a chart that uses datetime for the x-axis and dollars for the y-axis in Bokeh. I want to place a logo in the upper left corner of the plot area. Bokeh documentations seems especially cryptic on placing images. This code works:
from bokeh.plotting import figure, show
#p = figure(x_range=(0,1200), y_range=(0,600))
p = figure(plot_width=1200, plot_height=600,
sizing_mode = 'scale_width',
toolbar_location='above',
x_axis_label='date',
x_axis_type='datetime',
y_axis_label='value',
)
p.image_url(x=0, y=1, url=["Shrewd_Lines_200.png"], anchor='bottom_left')
show(p)
But when I place this into my main chart, where the data is in datetime, I can not get an image to appear. Here are the key excerpts from the code within the primary chart:
plot = figure(plot_width=1200, plot_height=600,
sizing_mode = 'scale_width',
toolbar_location='above',
tools=tools,
title=plot_dict['chart_title'],
x_axis_label='date',
x_axis_type='datetime',
y_axis_label='value',
)
plot.x_range.end=plot_dict['end_data'] + extend_time
if plot_dict['start_chart'] == 'auto':
plot.x_range.start=plot_dict['start_user_data']
else:
plot.x_range.start = plot_dict['start_chart']
plot.y_range.start=0
plot.y_range.end= extend_y * plot_dict['max_value']
plot.left[0].formatter.use_scientific = False
plot.title.text_font_size = "16pt"
I have tried various approaches to plot the image such as:
plot.image_url(x=0, y=0, url=["Shrewd_Lines_200.png"], anchor='bottom_left')
plot.image_url(x=plot_dict['start_user_data'], y=10000000, url=["Shrewd_Lines_200.png"], anchor='bottom_left')
I have several labels in the chart that work quite nicely. Is there a way to specify image location and size using screen units, in the same manner as you specify locations for labels?
Thought I would post how I got this working in order to move forward. I used the following for my Bokeh plot that places my logo with some generic math to convert data space to screen space. It does this without using numpy arrays or ColumnDataSource (neither of which are bad, but trying to keep simple):
from bokeh.plotting import figure, show
# chart size and ranges need defined for dataspace location
# chart size
chart_width = 900
chart_height = 600
aspect_ratio = chart_width/chart_height
# limits of data ranges
x1 = 300
x2 = 1200
y1 = 0
y2 = 600
plot = figure(
plot_width=chart_width,
plot_height=chart_height,
x_range=(x1, x2),
y_range=(y1, y2),
sizing_mode = 'stretch_both',
x_axis_label='date',
x_axis_type='datetime',
y_axis_label='value')
plot.image_url(url=['my_image.png'], x=(.01*(x2-x1))+x1, y=(.98*(y2-y1))+y1,
w=.35*(x2-x1)/aspect_ratio, h=.1*(y2-y1), anchor="top_left")
show(plot)
Note the x_axis_type can be any type with this schema, datetime was just the issue I was dealing with.

How to set initial zoom of bokeh box chart of pandas group with a large number of categories

I'm plotting covid-19 data for countries grouped by World Bank regions using pandas and Bokeh.
from bokeh.io import output_file, show
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
group = data.groupby(["region", "CountryName"])
index_cmap = factor_cmap(
'region_CountryName',
palette=Spectral5,
factors=sorted(data.region.unique()),
end=1
)
p = figure(plot_width=800, plot_height=600, title="Confirmed cases per 100k people by country",
x_range=group, toolbar_location="left")
p.vbar(x='region_CountryName', top='ConfirmedPer100k_max', width=1, source=group,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 3.14159/2
p.xaxis.group_label_orientation = 3.14159/2
p.outline_line_color = None
show(p)
And I get a
I would like to set some sort of initial zoom into the x-axis to get a more manageable image
, which I got by manually zooming in.
Any suggestions?
You should be able to accomplish this with the x_range parameter. In this example, the plot's x range would be the first 20 countries. You can adjust as needed. You might also have to mess around a bit to get the group_cn_list correct. It's hard to say without seeing your data. If you can post a df example for reproducibility, it would help.
group_cn_list = group["CountryName"].tolist()
p = figure(plot_width=800, plot_height=600, title="Confirmed cases per 100k people by country",
x_range=group_cn_list[0:20], toolbar_location="left")

Changing bokeh grid lines position

I am trying to plot a few points on a graph, similarly to a heat map.
Sample code (adapted from the heat map section here):
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
import numpy as np
# change this if you don't run it on a Jupyter Notebook
output_notebook()
testx = np.random.randint(0,10,10)
testy = np.random.randint(0,10,10)
npdata = np.stack((testx,testy), axis = 1)
hist, bins = np.histogramdd(npdata, normed = False, bins = (10,10), range=((0,10),(0,10)))
data = pd.DataFrame(hist, columns = [str(x) for x in range(10)])
data.columns.name = 'y'
data['x'] = [str(x) for x in range(10)]
data = data.set_index('x')
df = pd.DataFrame(data.stack(), columns=['present']).reset_index()
source = ColumnDataSource(df)
colors = ['lightblue', "yellow"]
mapper = LinearColorMapper(palette=colors, low=df.present.min(), high=df.present.max())
p = figure(plot_width=400, plot_height=400, title="test circle map",
x_range=list(data.index), y_range=list((data.columns)),
toolbar_location=None, tools="", x_axis_location="below")
p.circle(x="x", y="y", size=20, source=source,
line_color=None, fill_color=transform('present', mapper))
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 10
p.xaxis.major_label_orientation = 0
show(p)
That returns:
Now, as you can see, the grid lines are centered on the points(circles), and I would like, instead to have the circles enclosed in a square created by the lines.
I went through this to see if I could find information on how to offset the grid lines by 0.5 (that would have worked), but I was not able to.
There's nothing built into Bokeh to accomplish this kind of offsetting of categorical ticks, but you can write a custom extension to do it:
CS_CODE = """
import {CategoricalTicker} from "models/tickers/categorical_ticker"
export class MyTicker extends CategoricalTicker
type: "MyTicker"
get_ticks: (start, end, range, cross_loc) ->
ticks = super(start, end, range, cross_loc)
# shift the default tick locations by half a categorical bin width
ticks.major = ([x, 0.5] for x in ticks.major)
return ticks
"""
class MyTicker(CategoricalTicker):
__implementation__ = CS_CODE
p.xgrid.ticker = MyTicker()
p.ygrid.ticker = MyTicker()
Note that Bokeh assumes CoffeeScript by default when the code is just a string, but it's possible to use pure JS or TypeScript as well. Adding this to your code yields:
Please note the comment about output_notebook you must call it (possibly again, if you have called it previously) after the custom model is defined, due to #6107

How can I add label to a Bokeh barchart?

I have a dataframe as
df = pd.DataFrame(data = {'Country':'Spain','Japan','Brazil'],'Number':[10,20,30]})
I wanted to plot a bar chart with labels (that is value of 'Number') annotated on the top for each bar and proceeded accordingly.
from bokeh.charts import Bar, output_file,output_notebook, show
from bokeh.models import Label
p = Bar(df,'Country', values='Number',title="Analysis", color = "navy")
label = Label(x='Country', y='Number', text='Number', level='glyph',x_offset=5, y_offset=-5)
p.add_annotation(label)
output_notebook()
show(p)
But I got an error as ValueError: expected a value of type Real, got COuntry of type str.
How do I solve this issue ?
Label produces a single label at position x and y. In you example, you are trying to add multiple labels using the data from your DataFrame as coordinates. Which is why you are getting your error message x and y need to be real coordinate values that map to the figure's x_range and y_range. You should look into using LabelSet (link) which can take a Bokeh ColumnDataSource as an argument and build multiple labels.
Unforutnately, you are also using a Bokeh Bar chart which is a high level chart which creates a categorical y_range. Bokeh cannot put labels on categorical y_ranges for now. You can circumvent this problem by creating a lower level vbar chart using placeholder x values and then styling it to give it the same look as your original chart. Here it is in action.
import pandas as pd
from bokeh.plotting import output_file, show, figure
from bokeh.models import LabelSet, ColumnDataSource, FixedTicker
# arbitrary placeholders which depends on the length and number of labels
x = [1,2,3]
# This is offset is based on the length of the string and the placeholder size
offset = -0.05
x_label = [x + offset for x in x]
df = pd.DataFrame(data={'Country': ['Spain', 'Japan', 'Brazil'],
'Number': [10, 20, 30],
'x': x,
'y_label': [-1.25, -1.25, -1.25],
'x_label': x_label})
source = ColumnDataSource(df)
p = figure(title="Analysis", x_axis_label='Country', y_axis_label='Number')
p.vbar(x='x', width=0.5, top='Number', color="navy", source=source)
p.xaxis.ticker = FixedTicker(ticks=x) # Create custom ticks for each country
p.xaxis.major_label_text_font_size = '0pt' # turn off x-axis tick labels
p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks
label = LabelSet(x='x_label', y='y_label', text='Number',
level='glyph', source=source)
p.add_layout(label)
show(p)

How to add data labels to a bar chart in Bokeh?

In the Bokeh guide there are examples of various bar charts that can be created. http://docs.bokeh.org/en/0.10.0/docs/user_guide/charts.html#id4
This code will create one:
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df
p = Bar(df, 'cyl', values='mpg', title="Total MPG by CYL")
output_file("bar.html")
show(p)
My question is if it's possible to add data labels to each individual bar of the chart? I searched online but could not find a clear answer.
Use Labelset
Use Labelset to create a label over each individual bar
In my example I'm using vbar with the plotting interface, it is a little bit more low level then the Charts interface, but there might be a way to add it into the Bar chart.
from bokeh.palettes import PuBu
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, ranges, LabelSet
from bokeh.plotting import figure
output_notebook()
source = ColumnDataSource(dict(x=['Áætlaðir','Unnir'],y=[576,608]))
x_label = ""
y_label = "Tímar (klst)"
title = "Tímar; núllti til þriðji sprettur."
plot = figure(plot_width=600, plot_height=300, tools="save",
x_axis_label = x_label,
y_axis_label = y_label,
title=title,
x_minor_ticks=2,
x_range = source.data["x"],
y_range= ranges.Range1d(start=0,end=700))
labels = LabelSet(x='x', y='y', text='y', level='glyph',
x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')
plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])
plot.add_layout(labels)
show(plot)
You can find more about labelset here: Bokeh annotations
NOTE FROM BOKEH MAINTAINERS The portions of the answer below that refer to the bokeh.charts are of historical interest only. The bokeh.charts API was deprecated and subsequently removed from Bokeh. See the answers here and above for information on the stable bokeh.plotting API
Yes, you can add labels to each bar of the chart. There are a few ways to do this. By default, your labels are tied to your data. But you can change what is displayed. Here are a few ways to do that using your example:
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df
from bokeh.layouts import gridplot
from pandas import DataFrame
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import Range1d, HoverTool
# output_file("bar.html")
""" Adding some sample labels a few different ways.
Play with the sample data and code to get an idea what does what.
See below for output.
"""
Sample data (new labels):
I used some logic to determine the new dataframe column. Of course you could use another column already in df (it all depends on what data you're working). All you really need here is to supply a new column to the dataframe.
# One method
labels = []
for number in df['cyl']:
if number == 3:
labels.append("three")
if number == 4:
labels.append("four")
if number == 5:
labels.append("five")
if number == 6:
labels.append("six")
if number == 8:
labels.append("eight")
df['labels'] = labels
Another way to get a new dataframe column. Again, we just need to supply df a new column to use on our bar plot.
# Another method
def new_labels(x):
if x % 2 != 0 or x == 6:
y = "Inline"
elif x % 2 == 0:
y = "V"
else:
y = "nan"
return y
df["more_labels"] = df["cyl"].map(new_labels)
Now the bar chart:
I've done it two ways. p1 just specifies the new labels. Note that because I used strings it put them in alphabetical order on the chart. p2 uses the original labels, plus adds my new labels on the same bar.
# Specifying your labels
p1 = Bar(df, label='labels', values='mpg',
title="Total MPG by CYL, remapped labels, p1",
width=400, height=400, legend="top_right")
p2 = Bar(df, label=['cyl', 'more_labels'], values='mpg',
title="Total MPG by CYL, multiple labels, p2", width=400, height=400,
legend="top_right")
Another way:
Bokeh has three main "interface levels". High level charts provides quick easy access but limited functionality; plotting which gives more options; models gives even more options.
Here I'm using the plotting interface and the Figure class that contains a rect method. This gives you more detailed control of your chart.
# Plot with "intermediate-level" bokeh.plotting interface
new_df = DataFrame(df.groupby(['cyl'])['mpg'].sum())
factors = ["three", "four", "five", "six", "eight"]
ordinate = new_df['mpg'].tolist()
mpg = [x * 0.5 for x in ordinate]
p3 = figure(x_range=factors, width=400, height=400,
title="Total MPG by CYL, using 'rect' instead of 'bar', p3")
p3.rect(factors, y=mpg, width=0.75, height=ordinate)
p3.y_range = Range1d(0, 6000)
p3.xaxis.axis_label = "x axis name"
p3.yaxis.axis_label = "Sum(Mpg)"
A fourth way to add specific labels:
Here I'm using the hover plot tool. Hover over each bar to display your specified label.
# With HoverTool, using 'quad' instead of 'rect'
top = [int(x) for x in ordinate]
bottom = [0] * len(top)
left = []
[left.append(x-0.2) for x in range(1, len(top)+1)]
right = []
[right.append(x+0.2) for x in range(1, len(top)+1)]
cyl = ["three", "four", "five", "six", "eight"]
source = ColumnDataSource(
data=dict(
top=[int(x) for x in ordinate],
bottom=[0] * len(top),
left=left,
right=right,
cyl=["three", "four", "five", "six", "eight"],
)
)
hover = HoverTool(
tooltips=[
("cyl", "#cyl"),
("sum", "#top")
]
)
p4 = figure(width=400, height=400,
title="Total MPG by CYL, with HoverTool and 'quad', p4")
p4.add_tools(hover)
p4.quad(top=[int(x) for x in ordinate], bottom=[0] * len(top),
left=left, right=right, color="green", source=source)
p4.xaxis.axis_label = "x axis name"
Show all four charts in a grid:
grid = gridplot([[p1, p2], [p3, p4]])
show(grid)
These are the ways I am aware of. There may be others. Change whatever you like to fit your needs. Here is what running all of this will output (you'll have to run it or serve it to get the hovertool):

Categories

Resources