Labels not appearing in python bokeh bar plot with groups - python

I want to add labels with the values above the bars like here: How to add data labels to a bar chart in Bokeh? but don't know how to do it. My code looks different then other examples, the code is working but maybe it is not the right way.
My code:
from bokeh.io import export_png
from bokeh.io import output_file, show
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, ranges, LabelSet, Label
import pandas as pd
d = {'lvl': ["lvl1", "lvl2", "lvl2", "lvl3"],
'feature': ["test1", "test2","test3","test4"],
'count': ["5", "20","8", "90"]}
dfn = pd.DataFrame(data=d)
sourceframe = ColumnDataSource(data=dfn)
groupn = dfn.groupby(by=['lvl', 'feature'])
index_cmapn = factor_cmap('lvl_feature', palette=Spectral5, factors=sorted(dfn.lvl.unique()), end=1)
pn = figure(plot_width=800, plot_height=300, title="Count",x_range=groupn, toolbar_location=None)
labels = LabelSet(x='feature', y='count', text='count', level='glyph',x_offset=0, y_offset=5, source=sourceframe, render_mode='canvas',)
pn.vbar(x='lvl_feature', top="count_top" ,width=1, source=groupn,line_color="white", fill_color=index_cmapn, )
pn.y_range.start = 0
pn.x_range.range_padding = 0.05
pn.xgrid.grid_line_color = None
pn.xaxis.axis_label = "levels"
pn.xaxis.major_label_orientation = 1.2
pn.outline_line_color = None
pn.add_layout(labels)
export_png(pn, filename="color.png")
I think it has something to do with my dfn.groupby(by=['lvl', 'feature']) and the (probably wrong) sourceframe = ColumnDataSource(data=dfn).
The plot at this moment:

You can add the groups names in the initial dictionary like this:
d = {'lvl': ["lvl1", "lvl2", "lvl2", "lvl3"],
'feature': ["test1", "test2","test3","test4"],
'count': ["5", "20","8", "90"],
'groups': [('lvl1', 'test1'), ('lvl2', 'test2'), ('lvl2', 'test3'), ('lvl3', 'test4')]}
And then call LabelSet using as x values the groups.
labels = LabelSet(x='groups', y='count', text='count', level='glyph',x_offset=20, y_offset=0, source=sourceframe, render_mode='canvas',)
In this way the labels appear. Note that I played a bit with the offset to check if that was the problem, you can fix that manually.

Related

Cannot import name 'Scatter' from 'bokeh.plotting'

I am trying to represent the data using the bokeh scatter.
Here is my code:
from bokeh.plotting import Scatter, output_file, show import pandas
df=pandas.Dataframe(colume["X","Y"])
df["X"]=[1,2,3,4,5,6,7]
df["Y"]=[23,43,32,12,34,54,33]
p=Scatter(df,x="X",y="Y", title="Day Temperature measurement", xlabel="Tempetature", ylabel="Day")
output_file("File.html")
show(p)
The Output should look like this:
Expected Output
The error is:
ImportError Traceback (most recent call
> last) <ipython-input-14-1730ac6ad003> in <module>
> ----> 1 from bokeh.plotting import Scatter, output_file, show
> 2 import pandas
> 3
> 4 df=pandas.Dataframe(colume["X","Y"])
> 5
ImportError: cannot import name 'Scatter' from 'bokeh.plotting'
(C:\Users\LENOVO\Anaconda3\lib\site-packages\bokeh\plotting__init__.py)
I had also found that the Scatter is no longer maintained now. Is there is any way to use it?
Also which alternative do I have to represent the data same as the Scatter using any another python libraries?
Using older version of Bokeh will resolve this issue?
Scatter (with a capital S) has never been part of bokeh.plotting. It used to be a part of the old bokeh.charts API that was removed several years ago. However, it is not needed at all to create basic scatter plots, since all the glyph methods in bokeh.plotting (e.g circle, square) are all implicitly scatter-type functions to begin with:
from bokeh.plotting import figure, show
import pandas as pd
df = pd.DataFrame({"X" :[1,2,3,4,5,6,7],
"Y": [23,43,32,12,34,54,33]})
p = figure(x_axis_label="Tempetature", y_axis_label="Day",
title="Day Temperature measurement")
p.circle("X", "Y", size=15, source=df)
show(p)
Which yields:
You can also just pass the data directly to circle as in the other answer.
If you want to do fancier things, like map the marker type based on a column there is also a plot.scatter (lower case s) methods on the figure:
from bokeh.plotting import figure, show
from bokeh.sampledata.iris import flowers
from bokeh.transform import factor_cmap, factor_mark
SPECIES = ['setosa', 'versicolor', 'virginica']
MARKERS = ['hex', 'circle_x', 'triangle']
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Sepal Width'
p.scatter("petal_length", "sepal_width", source=flowers, legend_field="species", fill_alpha=0.4, size=12,
marker=factor_mark('species', MARKERS, SPECIES),
color=factor_cmap('species', 'Category10_3', SPECIES))
show(p)
which yields:
If you look up "scatter" in the docs, you'll find
Scatter Markers
To scatter circle markers on a plot, use the circle() method of Figure:
from bokeh.plotting import figure, output_file, show
# output to static HTML file
output_file("line.html")
p = figure(plot_width=400, plot_height=400)
# add a circle renderer with a size, color, and alpha
p.circle([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=20, color="navy", alpha=0.5)
# show the results
show(p)
To work with dataframes, just pass in the columns like df.X and df.Y to the x and y args.
from bokeh.plotting import figure, show, output_file
import pandas as pd
df = pd.DataFrame(columns=["X","Y"])
df["X"] = [1,2,3,4,5,6,7]
df["Y"] = [23,43,32,12,34,54,33]
p = figure()
p.scatter(df.X, df.Y, marker="circle")
#from bokeh.io import output_notebook
#output_notebook()
show(p) # or output to a file...

How to properly handle datetime and categorical axes in bokeh/holoviews heatmap plot?

I'm trying to plot a simple heatmap using bokeh/holoviews. My data (pandas dataframe) has categoricals (on y) and datetime (on x). The problem is that the number of categorical elements is >3000 and the resulting plot appears with messed overlapped tickers on the y axis that makes it totally useless. Currently, is there a reliable way in bokeh to select only a subset of the tickers based on the zoom level?
I've already tried plotly and the result looks perfect but however I need to use bokeh/holoviews and datashader. I want also avoid to replace categoricals with numericals tickers.
I've also tried this solution but actually it doesn't work (bokeh 1.2.0).
This is a toy example representing my use case (Actually here #y is 1000 but it gives the idea)
from datetime import datetime
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
from bokeh.io import output_notebook
output_notebook()
# build sample data
index = pd.date_range(start='1/1/2019', periods=1000, freq='T')
data = np.random.rand(1000,100)
columns = ['col'+ str(n) for n in range(100)]
# initial data format
df = pd.DataFrame(data=data, index=index, columns=columns)
# bokeh
df = df.stack().reset_index()
df.rename(columns={'level_0':'x','level_1':'y', 0:'z'},inplace=True)
df.sort_values(by=['y'],inplace=True)
x = [
date.to_datetime64().astype('M8[ms]').astype('O')
for date in df.x.to_list()
]
data = {
'value': df.z.to_list(),
'x': x,
'y': df.y.to_list(),
'date' : df.x.to_list()
}
p = figure(x_axis_type='datetime', y_range=columns, width=900, tooltips=[("x", "#date"), ("y", "#y"), ("value", "#value")])
p.rect(x='x', y='y', width=60*1000, height=1, line_color=None,
fill_color=linear_cmap('value', 'Viridis256', low=df.z.min(), high=df.z.max()), source=data)
show(p)
Finally, I partially followed the suggestion from James and managed to get it to work using a python callback for the ticker. This solution was hard to find for me. I really searched all the Bokeh docs, examples and source code for days.
The main problem for me is that in the doc is not mentioned how I can use "ColumnDataSource" objects in the custom callback.
https://docs.bokeh.org/en/1.2.0/docs/reference/models/formatters.html#bokeh.models.formatters.FuncTickFormatter.from_py_func
Finally, this helped a lot:
https://docs.bokeh.org/en/1.2.0/docs/user_guide/interaction/callbacks.html#customjs-with-a-python-function.
So, I modified the original code as follow in the hope it can be useful to someone:
from datetime import datetime
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter
from bokeh.models import ColumnDataSource
output_notebook()
# build sample data
index = pd.date_range(start='1/1/2019', periods=1000, freq='T')
data = np.random.rand(1000,100)
columns_labels = ['col'+ str(n) for n in range(100)]
columns = [n for n in range(100)]
# initial data format
df = pd.DataFrame(data=data, index=index, columns=columns)
# bokeh
df = df.stack().reset_index()
df.rename(columns={'level_0':'x','level_1':'y', 0:'z'},inplace=True)
df.sort_values(by=['y'],inplace=True)
x = [
date.to_datetime64().astype('M8[ms]').astype('O')
for date in df.x.to_list()
]
data = {
'value': df.z.to_list(),
'x': x,
'y': df.y.to_list(),
'y_labels_tooltip' : [columns_labels[k] for k in df.y.to_list()],
'y_ticks' : columns_labels*1000,
'date' : df.x.to_list()
}
cd = ColumnDataSource(data=data)
def ticker(source=cd):
labels = source.data['y_ticks']
return "{}".format(labels[tick])
#p = figure(x_axis_type='datetime', y_range=columns, width=900, tooltips=[("x", "#date{%F %T}"), ("y", "#y_labels"), ("value", "#value")])
p = figure(x_axis_type='datetime', width=900, tooltips=[("x", "#date{%F %T}"), ("y", "#y_labels_tooltip"), ("value", "#value")])
p.rect(x='x', y='y', width=60*1000, height=1, line_color=None,
fill_color=linear_cmap('value', 'Viridis256', low=df.z.min(), high=df.z.max()), source=cd)
p.hover.formatters = {'date': 'datetime'}
p.yaxis.formatter = FuncTickFormatter.from_py_func(ticker)
p.yaxis[0].ticker.desired_num_ticks = 20
show(p)
The result is this:

Bokeh vertical bar position misalign with ticker

Here is a snippet plotting some vBars (jupyter notebook):
import random
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, Range1d
from bokeh.models.glyphs import VBar
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
# data
data = {'x': [], 'y': [], 'z': []}
for i in range(1, 10+1):
data['x'].append(i)
data['y'].append(random.randint(1, 100))
data['z'].append(random.uniform(1.00, 1000.00))
source = ColumnDataSource(data)
xdr = FactorRange(factors=[str(x) for x in data['x']])
ydr = Range1d(start=0, end=max(data['y'])*1.5)
f = figure(x_range=xdr, y_range=ydr, plot_width=1000, plot_height=300, tools='',
toolbar_location='above', title='title', outline_line_color='gray')
glyph = VBar(x='x', top='y', bottom=0,
width=0.8, fill_color='blue')
f.add_glyph(source, glyph)
f.add_tools(HoverTool(
tooltips=[
('time', '$x{0}'),
('value', '#' + 'y' + '{0}'),
('money', '#z')
],
mode='vline'
))
output_notebook()
show(f)
After passing the x_range && y_range, the vertical bars misalign with the ticker position:-
In normal case without the x_range && y_range, it works fine:-
I wonder what is the parameter governing the vbar position? Why they 'moved' after receiving custom ticker names?
It misaligned because of the FactorRange. Not exactly sure why... I replaced this by using the min and max values of the ColumnDataSource and this works fine.
import random
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, Range1d
from bokeh.models.glyphs import VBar
from bokeh.plotting import figure
from bokeh.io import show
# data
data = {'x': [], 'y': [], 'z': []}
for i in range(1, 10+1):
data['x'].append(i)
data['y'].append(random.randint(1, 100))
data['z'].append(random.uniform(1.00, 1000.00))
source = ColumnDataSource(data)
ydr = Range1d(start=0, end=max(data['y'])*1.5)
f = figure(x_range=(min(source.data['x'])-0.5, max(source.data['x'])+0.5), y_range=ydr, plot_width=1000, plot_height=300, tools='', toolbar_location='above', title='title', outline_line_color='gray')
glyph = VBar(x='x', top='y', bottom=0,
width=0.8, fill_color='blue')
f.add_glyph(source, glyph)
f.add_tools(HoverTool(
tooltips=[
('time', '$x{0}'),
('value', '#' + 'y' + '{0}'),
('money', '#z')
],
mode='vline'
))
show(f)
I also came accross this issue and noticed the cause behind this issue:
Your data suggests a numeric x axis while the factor range is initialized as a categorical axis (your are using strings).
If you want to have a categorical axis your data needs to be adjusted accordingly
data['x'].append(str(i))
instead of
data['x'].append(i)

Incomplete bokeh plot

I have a pandas dataframe of 10 columns and trying to get bar plot using Bokeh.
The HTML file has the complete plot when I use plot_width=10000.
However when I increase the plot width(so that there is space between x axes values) to 30000, the plot does not fill beyond 2010. Here is the complete code. Please suggest the way forward.
from bokeh.palettes import Viridis6 as palette
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource,FactorRange,HoverTool
from bokeh.palettes import Spectral6
from flask import Flask, request, render_template, session, redirect,send_file
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_file,save
from bokeh.embed import components,file_html
from bokeh.resources import CDN
from bokeh.layouts import row,column
from bokeh.core.properties import value
dates = pd.date_range('20050101', periods=3900)
df = pd.DataFrame(np.random.randn(3900, 10), index=dates, columns=list('ABCDEFGHIJ'))
s = df.resample('M').mean().stack()
s.index = [s.index.get_level_values(0).strftime('%Y-%m-%d'),s.index.get_level_values(1)]
x = s.index.values
l1=list(s.index.levels[1])
counts = s.values
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=250,plot_width=30000, title='Plotting data',
toolbar_location=None, tools="")
p.vbar(x='x', top='counts', width=1, source=source, line_color="white")
p.y_range.start = s.values.min()
p.y_range.end = s.values.max()
p.x_range.range_padding = 0.01
p.y_range.range_padding = 0.01
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
output_file('test_plot.html')
save([p])
show(p)
This works fine for me with Bokeh 1.0.4 and OSX/Safari. I suspect this is a limitation/issue with the underlying HTML Canvas implementation in whatever browser you are using, in which case there is nothing we can do about it. The only suggestions I can make are to split the plot up into smaller subplots, or use a different browser (or possibly different version of the same browser)

how use bokeh vbar chart parameter with groupby object?

Question
Below code is grouped vbar chart example from bokeh documentation.
There are something i can't understand on this example.
Where 'cyl_mfr' is come from in factor_cmap() and vbar()?
'mpg_mean' , is it calculating the mean of 'mpg' column? if then,
why 'mpg_sum' doesn't work?
I want to make my own vbar chart like this example.
Code
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
output_file("bars.html")
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5,
factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders
and Manufacturer",
x_range=group, toolbar_location=None, tools="")
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
p.add_tools(HoverTool(tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr",
"#cyl_mfr")]))
show(p)
The group = df.groupby(('cyl', 'mfr')) makes a <pandas.core.groupby.DataFrameGroupBy object at 0x0xxx>. If you pass this to a ColumnDataSource, bokeh does a lot of magic, and calculates a lot of statistics already
df.columns
Index(['mpg', 'cyl', 'displ', 'hp', 'weight', 'accel', 'yr', 'origin', 'name', 'mfr'],
source.column_names
['accel_count', 'accel_mean', 'accel_std', 'accel_min',
'accel_25%', 'accel_50%', 'accel_75%', 'accel_max', 'displ_count',
'displ_mean', 'displ_std', 'displ_min', 'displ_25%', 'displ_50%',
'displ_75%', 'displ_max', 'hp_count', 'hp_mean', 'hp_std',
'hp_min', 'hp_25%', 'hp_50%', 'hp_75%', 'hp_max', 'mpg_count',
'mpg_mean', 'mpg_std', 'mpg_min', 'mpg_25%', 'mpg_50%',
'mpg_75%', 'mpg_max', 'weight_count', 'weight_mean', 'weight_std',
'weight_min', 'weight_25%', 'weight_50%', 'weight_75%',
'weight_max', 'yr_count', 'yr_mean', 'yr_std', 'yr_min',
'yr_25%', 'yr_50%', 'yr_75%', 'yr_max', 'cyl_mfr']
the cyl_mfr is the labels of the 2 columns on which you grouped by concatenated. In source this has become a column of tuples
mpg_sum is not calculated. If you cant the sum, you will need to calculate that yourself.

Categories

Resources