I have a multi line plot in Bokeh:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.palettes import Category20c_7
from bokeh.io import output_file
from bokeh.models import SingleIntervalTicker, LinearAxis, ColumnDataSource
output_file("conso_daily.html")
treatcriteria_daily_data = pd.read_csv("treatcriteria_evolution.csv", sep=';')
final_daily_data = treatcriteria_daily_data.groupby(['startdate_weekyear','startdate_dayweek'],as_index = False).sum().pivot('startdate_weekyear','startdate_dayweek').fillna(0)
# keep only integer values in x axis
def interval_integer(plot):
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=1)
xaxis = LinearAxis(ticker=ticker)
plot.add_layout(xaxis, 'below')
numlines = len(final_daily_data.columns)
palette = Category20c_7[0:numlines]
# remove the last week if there is not all the data
data_without_last_week = final_daily_data[(final_daily_data != 0).all(1)]
cpu_values_daily = data_without_last_week.values.T.tolist()
weeks = []
for i in range(0,len(data_without_last_week.columns)):
weeks.append(data_without_last_week.index)
df = {'week': weeks,
'day': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
'color': ['red', 'orange', 'yellow', 'green', 'grey', 'pink', 'purple'],
'HCPU': cpu_values_daily}
source = ColumnDataSource(df)
p = figure(width=800, height=500)
p.multi_line(xs='week', ys='HCPU', legend='day', color='color',
line_width=5, line_alpha=0.6, hover_line_alpha=1.0,
source=source)
p.xaxis.visible = False
p.left[0].formatter.use_scientific = False
interval_integer(p)
show(p)
I want to show legend outside the plot area because the top curve (Sunday) is hidden.
I try to follow this thread, but it's for single lines and not for multiline: Create a two line legend in a bokeh plot
Using this code, I searched to show legend in right outside the plot area, but it doesn't work:
legend = Legend(items=[
('Monday', [p[0]]),
('Tuesday', [p[1]]),
('Wednesday', [p[2]]),
('Thursday', [p[3]]),
('Friday', [p[4]]),
('Saturday', [p[5]]),
('Sunday', [p[6]]),
], location=(0, -30))
p.add_layout(legend, 'right')
TypeError: 'Figure' object is not subscriptable
Thank you.
Edit: Here is my data 'final_daily_data' if it's useful:
mc_cpu_hours \
startdate_dayweek 1 2 3
startdate_weekyear
27 527644.000731 468053.338183 517548.838022
28 349896.850976 481313.693908 372385.568095
29 168595.113447 388117.184580 373894.548600
30 176007.786269 364379.872622 366155.953075
31 177517.591864 0.000000 0.000000
startdate_dayweek 4 5 6 7
startdate_weekyear
27 573669.325129 515710.534260 511711.421986 841073.028107
28 378069.713821 385937.231788 385856.666340 842468.209151
29 343235.942227 376405.876236 400007.946715 662019.708660
30 375948.240935 366151.336263 395790.387672 700936.336812
31 0.000000 0.000000 0.000000 686023.780120
Your problem is in legend = Legend(items=[('Monday', [p[0]]), ...]) or even more precise in p[0], ..., p[7]. The figure objet is not subscriptable, because it is not a list or dictionary and this raises the error. I think in your case it is enough to define the Legend()-class blank, without any further information.
Small Example
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Legend
output_notebook()
source = pd.DataFrame({
'xs':[[1,2,3,4],[1,2,3,4]],
'ys':[[1,2,3,4],[4,3,2,1]],
'label':['a','b'],
'color':['blue','green']
})
p = figure(width=400, height=300)
p.add_layout(Legend(),'right')
p.multi_line(xs='xs', ys='ys', legend_field ='label', color='color', source=source)
show(p)
Output
Look at this answer, in particular the comment from #Sam De Meyer. In short, you create the figure, and then you do:
p.add_layout(p.legend[0], 'right')
show(p)
Related
I was consulting the bokeh user guide (link) on creating interactive legends (see subsection on "Hiding glyphs").
The code given allows me to create up to 4 stacked line charts. For example, if I try to create a fifth line chart ("AAP"):
p = figure(width=800, height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to hide the corresponding lines'
import pandas as pd
from bokeh.palettes import Spectral4
from bokeh.plotting import figure, show
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
for data, name, color in zip([AAPL, GOOG, IBM, MSFT, AAPL], ["AAPL", "GOOG", "IBM", "MSFT", "AAP"], Spectral4):
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8, legend_label=name)
p.legend.location = "top_left"
p.legend.click_policy="hide"
show(p)
it will still only show four.
I was wondering if it is possible to extend this code and create many more line charts (say 20) and if so, how this can be done.
Thank you
Your approach is working fine and would work, if you use Spectral5 instead of Spectral4 as palette. You have to know, that zip iterates over all lists and stops if one list has no more items. That was the case.
Minimal working example:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
output_notebook()
p = figure(width=800, height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to hide the corresponding lines'
dataset = [AAPL, GOOG, IBM, MSFT, AAPL]
nameset = ["AAPL", "GOOG", "IBM", "MSFT", "AAP"]
colorset = ['blue', 'red', 'green', 'magenta', 'black']
for data, name, color in zip(dataset, nameset, colorset):
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8, legend_label=name)
p.legend.location = "top_left"
p.legend.click_policy="hide"
show(p)
Output:
Comment
For this I used bokeh 2.4.3 but with bokeh 3.+ this should work, too.
I just discovered Bokeh recently, and I try to display a legend for each day of week (represented by 'startdate_dayweek'). The legend should contain the color for each row corresponding to each day.
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.palettes import Set1_7
output_file("conso_daily.html")
treatcriteria_data_global = pd.read_csv(r"treatcriteria_evolution.csv", sep=';')
final_global_data = treatcriteria_data_global.groupby(['startdate_weekyear','startdate_dayweek'],as_index = False).sum().pivot('startdate_weekyear','startdate_dayweek').fillna(0)
numlines = len(final_global_data.columns)
palette = Set1_7[0:numlines]
ts_list_of_list = []
for i in range(0,len(final_global_data.columns)):
ts_list_of_list.append(final_global_data.index)
vals_list_of_list = final_global_data.values.T.tolist()
p = figure(width=500, height=300)
p.left[0].formatter.use_scientific = False
p.multi_line(ts_list_of_list, vals_list_of_list,
legend='startdate_dayweek',
line_color = palette,
line_width=4)
show(p)
But I don't have the expected result in the legend:
How to have the legend for each day? Is the problem due to the fact that I created a MultiIndex table? Thanks.
The multi_line() function can take the parameter legend_field or legend_group. Both are working very well for your usecase, if you use a ColumnDataSource as source. Keep in mind, that a error will come if you use both parameters at the same time.
Minimal Example
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()
source = ColumnDataSource(dict(
xs=[[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5]],
ys=[[1,2,3,4,5],[1,1,1,1,5],[5,4,3,2,1]],
legend =['red', 'green', 'blue'],
line_color = ['red', 'green', 'blue']))
p = figure(width=500, height=300)
p.multi_line(xs='xs',
ys='ys',
legend_field ='legend',
line_color = 'line_color',
source=source,
line_width=4)
show(p)
Output
I am new to Bokeh (using v2.2.1) and looking for solution to label each data point. Replicating the examples shown in documents, I could not find solutions with X axis being string,
import pandas as pd
from bokeh.models import LabelSet, ColumnDataSource
from bokeh.plotting import figure, show
output_file("weekday_val.html")
vals = pd.DataFrame({'weekday': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
'Sales': [15, 25, 36, 17, 4]})
ds = ColumnDataSource(vals)
p = figure(x_range=vals['weekday'])
p.vbar(x='weekday', width=0.75, top='Sales', source=ds)
labels = LabelSet(x='weekday', y='Sales', text='Sales', source=ds,
level='glyph',
x_offset=5,
y_offset=5,
render_mode='canvas')
p.add_layout(labels)
show(p)
It's not giving any error but it is failing to print the labels on top of the vertical bars as I was expecting. This error occurred only after I upgraded Bokeh from v1.2.0 to v2.1.1.
Does LabelSet only take numerical values for the x-axis? Is there a workaround to use this for x-axis with strings?
As Eugene Pakhomov says in the comments, there is an issue in 2.2.1 regarding categorical coordinates and labels. It's due to be fixed in 2.3, but in the meantime, you can replace your categorical values with their indices and the annotations will render as expected.
import pandas as pd
from bokeh.models import LabelSet, ColumnDataSource, Range1d
from bokeh.plotting import output_notebook, figure, show
output_notebook()
vals = pd.DataFrame({'weekday': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
'Sales': [15, 25, 36, 17, 4],
'index' : range(5)}
)
ds = ColumnDataSource(vals)
# extend the y-range a bit more to give space to top label
p = figure(y_range=Range1d(0, 42), x_range=vals['weekday'], height=300)
p.vbar(x='weekday', width=0.75, top='Sales', source=ds)
labels = LabelSet(x='index', y='Sales', text='Sales', source=ds,
level='glyph',
x_offset=5,
y_offset=5,
render_mode='canvas')
p.add_layout(labels)
show(p)
After seeing the capabilities of Bokeh I started working with it. Now I am trying to make a Vbar with my dataset.
my dataset (10 rows)
dataset
I have read the tutorial quite a number of times and used the example provided by the official documentation:
https://hub.mybinder.org/user/bokeh-bokeh-notebooks-ate6xt0k/notebooks/tutorial/07%20-%20Bar%20and%20Categorical%20Data%20Plots.ipynb
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]
source = ColumnDataSource(data=dict(fruits=fruits, counts=counts, color=Spectral6))
p = figure(x_range=fruits, plot_height=250, y_range=(0, 9), title="Fruit Counts")
p.vbar(x='fruits', top='counts', width=0.9, color='color', legend="fruits", source=source)
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
show(p)
This I tried to replicate with my own dataset.
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
source = ColumnDataSource(top_ten_start)
p = figure(x_range='Bank_name', plot_height=250, y_range=(0, 90), title="BAnks")
p.vbar(x='Bank_name', top='Tier_1_ratio', width=0.9, legend="test", source=source)
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
show(p)
I expected to see a bar chart as shown on the tutorial but nothing is plotting.
I thought by replacing the input of the "x_range", "plot" and "x" it would be enough to work.
Perhaps the following information would help:
These are the dtypes:
Country_code object
Bank_name object
Tier_1_ratio float64
dtype: object
x_range expects a list of categorical values but you supplied a string. This would be okay if it was a glyph and you were using a source, but this is not a glyph. I changed this variable to source.data['Bank_name'] so it uses the bank names in your ColumnDataSource.
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.palettes import Spectral7
import pandas as pd
top_ten_start = pd.read_csv('top_ten_start.csv')
top_ten_start['color'] = Spectral7
source = ColumnDataSource(top_ten_start)
p = figure(x_range=source.data['Bank_name'], plot_height=750, y_range=(0, 90), title="Banks")
p.vbar(x='Bank_name', top='Tier_1_ratio', width=0.9, legend='Bank_name', source=source, color='color')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 45
show(p)
Question
Below code is grouped vbar chart example from bokeh documentation.
There are something i can't understand on this example.
Where 'cyl_mfr' is come from in factor_cmap() and vbar()?
'mpg_mean' , is it calculating the mean of 'mpg' column? if then,
why 'mpg_sum' doesn't work?
I want to make my own vbar chart like this example.
Code
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
output_file("bars.html")
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5,
factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders
and Manufacturer",
x_range=group, toolbar_location=None, tools="")
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
p.add_tools(HoverTool(tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr",
"#cyl_mfr")]))
show(p)
The group = df.groupby(('cyl', 'mfr')) makes a <pandas.core.groupby.DataFrameGroupBy object at 0x0xxx>. If you pass this to a ColumnDataSource, bokeh does a lot of magic, and calculates a lot of statistics already
df.columns
Index(['mpg', 'cyl', 'displ', 'hp', 'weight', 'accel', 'yr', 'origin', 'name', 'mfr'],
source.column_names
['accel_count', 'accel_mean', 'accel_std', 'accel_min',
'accel_25%', 'accel_50%', 'accel_75%', 'accel_max', 'displ_count',
'displ_mean', 'displ_std', 'displ_min', 'displ_25%', 'displ_50%',
'displ_75%', 'displ_max', 'hp_count', 'hp_mean', 'hp_std',
'hp_min', 'hp_25%', 'hp_50%', 'hp_75%', 'hp_max', 'mpg_count',
'mpg_mean', 'mpg_std', 'mpg_min', 'mpg_25%', 'mpg_50%',
'mpg_75%', 'mpg_max', 'weight_count', 'weight_mean', 'weight_std',
'weight_min', 'weight_25%', 'weight_50%', 'weight_75%',
'weight_max', 'yr_count', 'yr_mean', 'yr_std', 'yr_min',
'yr_25%', 'yr_50%', 'yr_75%', 'yr_max', 'cyl_mfr']
the cyl_mfr is the labels of the 2 columns on which you grouped by concatenated. In source this has become a column of tuples
mpg_sum is not calculated. If you cant the sum, you will need to calculate that yourself.