Here is a snippet plotting some vBars (jupyter notebook):
import random
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, Range1d
from bokeh.models.glyphs import VBar
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
# data
data = {'x': [], 'y': [], 'z': []}
for i in range(1, 10+1):
data['x'].append(i)
data['y'].append(random.randint(1, 100))
data['z'].append(random.uniform(1.00, 1000.00))
source = ColumnDataSource(data)
xdr = FactorRange(factors=[str(x) for x in data['x']])
ydr = Range1d(start=0, end=max(data['y'])*1.5)
f = figure(x_range=xdr, y_range=ydr, plot_width=1000, plot_height=300, tools='',
toolbar_location='above', title='title', outline_line_color='gray')
glyph = VBar(x='x', top='y', bottom=0,
width=0.8, fill_color='blue')
f.add_glyph(source, glyph)
f.add_tools(HoverTool(
tooltips=[
('time', '$x{0}'),
('value', '#' + 'y' + '{0}'),
('money', '#z')
],
mode='vline'
))
output_notebook()
show(f)
After passing the x_range && y_range, the vertical bars misalign with the ticker position:-
In normal case without the x_range && y_range, it works fine:-
I wonder what is the parameter governing the vbar position? Why they 'moved' after receiving custom ticker names?
It misaligned because of the FactorRange. Not exactly sure why... I replaced this by using the min and max values of the ColumnDataSource and this works fine.
import random
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, Range1d
from bokeh.models.glyphs import VBar
from bokeh.plotting import figure
from bokeh.io import show
# data
data = {'x': [], 'y': [], 'z': []}
for i in range(1, 10+1):
data['x'].append(i)
data['y'].append(random.randint(1, 100))
data['z'].append(random.uniform(1.00, 1000.00))
source = ColumnDataSource(data)
ydr = Range1d(start=0, end=max(data['y'])*1.5)
f = figure(x_range=(min(source.data['x'])-0.5, max(source.data['x'])+0.5), y_range=ydr, plot_width=1000, plot_height=300, tools='', toolbar_location='above', title='title', outline_line_color='gray')
glyph = VBar(x='x', top='y', bottom=0,
width=0.8, fill_color='blue')
f.add_glyph(source, glyph)
f.add_tools(HoverTool(
tooltips=[
('time', '$x{0}'),
('value', '#' + 'y' + '{0}'),
('money', '#z')
],
mode='vline'
))
show(f)
I also came accross this issue and noticed the cause behind this issue:
Your data suggests a numeric x axis while the factor range is initialized as a categorical axis (your are using strings).
If you want to have a categorical axis your data needs to be adjusted accordingly
data['x'].append(str(i))
instead of
data['x'].append(i)
Related
I'm trying to highlight last value of a time series plot by plot its value on yaxis, as shown in this question. I prefer using LabelSet over Legend because you can precisely control the text positions and also using a data source to update it. But unfortunately, I can not find out how to draw label text outside the plot box.
Here is some code to plot LabelSet and notice how the text is only shown inside the box (66.1x is partially blocked by yaxis):
import pandas as pd
from bokeh.io import output_notebook
output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import LabelSet, ColumnDataSource
#import bokeh.sampledata
#bokeh.sampledata.download()
from bokeh.sampledata.stocks import MSFT
df = pd.DataFrame(MSFT)[:50]
df["date"] = pd.to_datetime(df["date"])
p = figure(
x_axis_type="datetime", width=1000, toolbar_location='left',
title = "MSFT Candlestick", y_axis_location="right")
p.line(df.date, df.close)
ds = ColumnDataSource({'x': [df.date.iloc[-1]], 'y': [df.close.iloc[-1]], 'text': [' ' + str(df.close.iloc[-1])]})
ls = LabelSet(x='x', y='y', text='text', source=ds)
p.add_layout(ls)
show(p)
Please let me know how to show LabelSet outside the box, Thanks
I want to add labels with the values above the bars like here: How to add data labels to a bar chart in Bokeh? but don't know how to do it. My code looks different then other examples, the code is working but maybe it is not the right way.
My code:
from bokeh.io import export_png
from bokeh.io import output_file, show
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, ranges, LabelSet, Label
import pandas as pd
d = {'lvl': ["lvl1", "lvl2", "lvl2", "lvl3"],
'feature': ["test1", "test2","test3","test4"],
'count': ["5", "20","8", "90"]}
dfn = pd.DataFrame(data=d)
sourceframe = ColumnDataSource(data=dfn)
groupn = dfn.groupby(by=['lvl', 'feature'])
index_cmapn = factor_cmap('lvl_feature', palette=Spectral5, factors=sorted(dfn.lvl.unique()), end=1)
pn = figure(plot_width=800, plot_height=300, title="Count",x_range=groupn, toolbar_location=None)
labels = LabelSet(x='feature', y='count', text='count', level='glyph',x_offset=0, y_offset=5, source=sourceframe, render_mode='canvas',)
pn.vbar(x='lvl_feature', top="count_top" ,width=1, source=groupn,line_color="white", fill_color=index_cmapn, )
pn.y_range.start = 0
pn.x_range.range_padding = 0.05
pn.xgrid.grid_line_color = None
pn.xaxis.axis_label = "levels"
pn.xaxis.major_label_orientation = 1.2
pn.outline_line_color = None
pn.add_layout(labels)
export_png(pn, filename="color.png")
I think it has something to do with my dfn.groupby(by=['lvl', 'feature']) and the (probably wrong) sourceframe = ColumnDataSource(data=dfn).
The plot at this moment:
You can add the groups names in the initial dictionary like this:
d = {'lvl': ["lvl1", "lvl2", "lvl2", "lvl3"],
'feature': ["test1", "test2","test3","test4"],
'count': ["5", "20","8", "90"],
'groups': [('lvl1', 'test1'), ('lvl2', 'test2'), ('lvl2', 'test3'), ('lvl3', 'test4')]}
And then call LabelSet using as x values the groups.
labels = LabelSet(x='groups', y='count', text='count', level='glyph',x_offset=20, y_offset=0, source=sourceframe, render_mode='canvas',)
In this way the labels appear. Note that I played a bit with the offset to check if that was the problem, you can fix that manually.
I want to draw a circle with bokeh, the color of this circle depends on a column of DataFrame. But I got an empty plot. If i don't specify a color argument for p.circle, it'll work fine.
Here is the code, you can copy and paste and run it.
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral11
import pandas as pd
df = pd.DataFrame({
'price':[10,15,20,25,30],
'action':[0,1,0,2,3],
'sign':[0,-1,0,1,-1]
})
source = ColumnDataSource(data=dict(
index=df.index,
price=df.price,
action=df.action,
sign=df.sign
))
color_mapper = CategoricalColorMapper(factors= [str(i) for i in list(df.sign.unique())], palette=Spectral11)
p = figure(plot_width=800, plot_height=400)
# this works fine
p.circle('index', 'price', radius=0.2 , source=source)
# this don't work
p.circle('index', 'price', radius=0.2 , color={'field':'sign', 'transform':color_mapper}, source=source)
show(p)
Bokeh doesn't like it when you take some information from a ColumnDataSource, and other information from a different source. This worked for me(in a notebook):
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral11
import pandas as pd
output_notebook()
df = pd.DataFrame({
'price':[10,15,20,25,30],
'action':[0,1,0,2,3],
'sign':[0,-1,0,1,-1],
})
source = ColumnDataSource(data=dict(
index=df.index,
price=df.price,
action=df.action,
sign=df.sign,
color=[Spectral11[i+1] for i in df.sign]
))
p = figure(plot_width=800, plot_height=400)
# this don't work
p.circle('index', 'price', radius=0.2 ,
color='color',
source=source)
show(p)
I have the following code:
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.iris import flowers
from bokeh.models import LinearColorMapper
from bokeh.models import ColumnDataSource
from bokeh.models import ColorBar
from bokeh.palettes import Reds9
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = "Petal Length"
p.yaxis.axis_label = "Petal Width"
source = ColumnDataSource(flowers)
# Reverse the color and map it
Reds9.reverse()
exp_cmap = LinearColorMapper(palette=Reds9,
low = min(flowers["petal_length"]),
high = max(flowers["petal_length"]))
p.circle("petal_length", "petal_width", source=source, line_color=None,
fill_color={"field":"petal_length", "transform":exp_cmap})
bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p.add_layout(bar, "left")
show(p)
Which produces the following plot:
Notice that I use Brewer's Red palette which is limited to 9 colors.
from bokeh.palettes import Reds9
How can I expand it to 256?
Bokeh's palettes are simply a list of HTML colors. You can create your own one. For example, picking the list from https://www.w3schools.com/colors/colors_shades.asp:
myReds = [
'#000000',
'#080000',
'#100000',
'#180000',
'#200000',
'#280000',
'#300000',
'#380000',
'#400000',
'#480000',
'#500000',
'#580000',
'#600000',
'#680000',
'#700000',
'#780000',
'#800000',
'#880000',
'#900000',
'#980000',
'#A00000',
'#A80000',
'#B00000',
'#B80000',
'#C00000',
'#C80000',
'#D00000',
'#D80000',
'#E00000',
'#E80000',
'#F00000',
'#F80000',
'#FF0000']
Then replace Reds9 by myReds:
[...]
exp_cmap = LinearColorMapper(palette=myReds,
low = min(flowers["petal_length"]),
high = max(flowers["petal_length"]))
[...]
Question
Below code is grouped vbar chart example from bokeh documentation.
There are something i can't understand on this example.
Where 'cyl_mfr' is come from in factor_cmap() and vbar()?
'mpg_mean' , is it calculating the mean of 'mpg' column? if then,
why 'mpg_sum' doesn't work?
I want to make my own vbar chart like this example.
Code
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
output_file("bars.html")
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5,
factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders
and Manufacturer",
x_range=group, toolbar_location=None, tools="")
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
p.add_tools(HoverTool(tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr",
"#cyl_mfr")]))
show(p)
The group = df.groupby(('cyl', 'mfr')) makes a <pandas.core.groupby.DataFrameGroupBy object at 0x0xxx>. If you pass this to a ColumnDataSource, bokeh does a lot of magic, and calculates a lot of statistics already
df.columns
Index(['mpg', 'cyl', 'displ', 'hp', 'weight', 'accel', 'yr', 'origin', 'name', 'mfr'],
source.column_names
['accel_count', 'accel_mean', 'accel_std', 'accel_min',
'accel_25%', 'accel_50%', 'accel_75%', 'accel_max', 'displ_count',
'displ_mean', 'displ_std', 'displ_min', 'displ_25%', 'displ_50%',
'displ_75%', 'displ_max', 'hp_count', 'hp_mean', 'hp_std',
'hp_min', 'hp_25%', 'hp_50%', 'hp_75%', 'hp_max', 'mpg_count',
'mpg_mean', 'mpg_std', 'mpg_min', 'mpg_25%', 'mpg_50%',
'mpg_75%', 'mpg_max', 'weight_count', 'weight_mean', 'weight_std',
'weight_min', 'weight_25%', 'weight_50%', 'weight_75%',
'weight_max', 'yr_count', 'yr_mean', 'yr_std', 'yr_min',
'yr_25%', 'yr_50%', 'yr_75%', 'yr_max', 'cyl_mfr']
the cyl_mfr is the labels of the 2 columns on which you grouped by concatenated. In source this has become a column of tuples
mpg_sum is not calculated. If you cant the sum, you will need to calculate that yourself.