I'm creating a nested categorical bar chart with bokeh and pandas. I tested the exampled included in Bokeh docs (shown below)
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap
output_file("bar_pandas_groupby_nested.html")
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
group = df.groupby(by=['cyl', 'mfr'])
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
x_range=group, toolbar_location=None, tooltips=[("MPG", "#mpg_mean"), ("Cyl, Mfr", "#cyl_mfr")])
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=group,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
I'm trying to apply this with my out set of data. However, we i run the script i get the error
Js error
This is my code:
def test(data):
output_file("bar_pandas_groupby_nested.html")
print(df.head())
data.prueba = data.prueba.astype(str)
data.inst_nombre_institucion = data.inst_nombre_institucion.astype(str)
group = data.groupby(by=['prueba', 'inst_nombre_institucion'])
index_cmap = factor_cmap('prueba_inst_nombre_institucion', palette=Spectral5, factors=sorted(data.prueba.unique()), end=1)
p = figure(plot_width=800, plot_height=300, title="Mean",
x_range=group, toolbar_location=None, tooltips=[("MPG", "#media_mod_ingles_mean"), ("prueba, institucion", "#prueba_inst_nombre_institucion")])
p.vbar(x='prueba_inst_nombre_institucion', top='media_mod_ingles_mean', width=1, source=group,
line_color="white", fill_color=index_cmap, )
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Mean"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
return True
And my data looks like this:
data.head()
Why do i get this error?
Thanks for your time!
UPDATE:
data.csv and script can be downloaded here
The issue is that your labels are way, way too long to fit inside a 300px high plot when they are oriented vertical-ish. If I change the orientation to
p.xaxis.major_label_orientation = 0.2
Then the plot can render, but you can also see the problem:
Alternatively, if I make the labels actually nearly vertical (~pi/2), and make the plot height be 800px, everything is visible:
But I would say that still is fairly hard to interpret/read. I would suggest trying to find shorter strings to use for your categories.
Related
Hi could someone point me what I do wrong: some time bokeh does not display color. If run the script there will be only red points, but when I change the 'green' to the 'lime' there will be points of two colors - why? what do I wrong?
import numpy as np
from scipy.signal import find_peaks
# Find peaks
max_peaks, max_other = find_peaks(price, height=0.1)
print(len(max_peaks))
min_peaks, min_other = find_peaks(-price)
print(len(min_peaks))
colors = np.full(len(price), 'None')
colors[max_peaks] = 'red'
colors[min_peaks] = 'green'
from bokeh.models import CrosshairTool
crosshair = CrosshairTool(dimensions='both')
width=1000
height = 600
price_dates = df['date'].to_numpy()
price_dates_dt = np.apply_over_axes(convert_int_to_datetime, price_dates, axes=0)
price_dates_str = np.apply_over_axes(convert_int_to_datetime_to_str, price_dates, axes=0)
price_source = ColumnDataSource(data=dict(
date = price_dates_dt,
date_str = price_dates_str,
price=df['close'].to_numpy(),
color=colors
))
f1 = figure(title=f'price', x_axis_type='datetime', tools='pan,wheel_zoom,box_zoom,reset,save,box_select,zoom_in,zoom_out, hover', sizing_mode='stretch_width',plot_width=width, plot_height=height)
f1.toolbar.logo = None
f1.add_tools(crosshair)
price_l = f1.line(source=price_source, x='date', y='price', legend_label='price', line_color='gray', line_dash='solid', line_width=0.8)
price_s = f1.scatter(source=price_source, x='date', y='price', legend_label='price', fill_color='color', line_color='color', size=2)
f1.add_tools(HoverTool(renderers=[price_s, price_l], tooltips=[
('index', '$index'),
('(x,y)', '($x{%F}, $y{0.00})'),
# ('date', '#date'),
('date_str', '#date_str'),
('price', '#price{0.00}')],
formatters={
'#date': 'datetime',
'#{price}': 'printf', }))
f1.legend.location = 'top_left'
f1.legend.click_policy = 'hide'
f1.xaxis.axis_label = 'Time'
f1.yaxis.axis_label = 'price'
f1.background_fill_color = '#dfe9f0'
# f1.background_fill_alpha = 0.5
f1.xgrid.grid_line_color='white'
f1.ygrid.grid_line_color='white'
show(f1)
I am trying to get to something like this but with more cats for each scenario (I have 4 scenarios but many cats):
I can only achieve this when the number of 'Cat's is equal to the number of 'Scenario's. I don't fully understand how the factors line in the code is working and I think the answer lies within that.
whenever I add more Cats I get this error:
IndexError: list index out of range
The code I have is follows:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.models import Range1d
from calendar import month_abbr
import numpy as np
from bokeh.palettes import Spectral3
from bokeh.transform import factor_cmap
systems = ["Scenario1", "Scenario2", "Scenario3", "Scenario4"]
subsystems =["Cat1","Cat2", "Cat3", "Cat4"]#, "Cat5", "Cat6"]
factors =[(systems[ind],subsystem) for ind, subsystem in enumerate(subsystems) for subsystem in subsystems]
count_closed = [52,52,49,26,9,8, 32,20]#,33,66,9,8]
count_open = [0,0,1, 0]
count_waived = [3,1,0,0]
statuses = ["count_closed", "count_open", "count_waived"]
data = dict(factors = factors, count_closed=count_closed, count_open=count_open, count_waived=count_waived )
source = ColumnDataSource(data=data)
p = figure(x_range = FactorRange(*factors), plot_height=250, title="Repeat 10 cats for each scenario",
toolbar_location = 'right',
tools = "hover", tooltips="$name #subsystems: #$name")
p.vbar_stack(statuses, x="factors", width=0.9, alpha = 0.5, color=["navy","red","pink"], source=source, legend_label=statuses)
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
p.legend.location = "top_center"
p.legend.orientation = "horizontal"
show(p)
To generate a list with tuples of all combinations, your variable factors, you can use
from itertools import product
factors = list(product(systems, subsystems))
This will create a list which is understood by bokehs FactorRange.
Complete Example
from itertools import product
import numpy as np
from bokeh.models import ColumnDataSource, FactorRange, Range1d
from bokeh.plotting import figure, show, output_notebook
output_notebook()
systems = ["Scenario1", "Scenario2", "Scenario3", "Scenario4"]
subsystems =["Cat1","Cat2", "Cat3", "Cat4", "Cat5", "Cat6"]
factors = list(product(systems, subsystems))
count_closed = np.random.randint(0,20, len(factors))
count_open = np.random.randint(0,20, len(factors))
count_waived = np.random.randint(0,20, len(factors))
statuses = ["count_closed", "count_open", "count_waived"]
data = dict(factors = factors, count_closed=count_closed, count_open=count_open, count_waived=count_waived )
source = ColumnDataSource(data=data)
p = figure(x_range = FactorRange(*factors), plot_height=250, title="Repeat 10 cats for each scenario",
toolbar_location = 'right',
tools = "hover", tooltips="$name #subsystems: #$name")
p.vbar_stack(statuses, x="factors", width=0.9, alpha = 0.5, color=["navy","red","pink"], source=source, legend_label=statuses)
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
p.legend.location = "top_center"
p.legend.orientation = "horizontal"
show(p)
Output
I have created a Sankeydiagram using a Holomap and I want to show the sums of the respective nodes by hovering over them but I don't know how I need to format my code or setup the dataframe to achieve this. Github
import holoviews as hv
from holoviews import opts, dim
import pandas as pd
from bokeh.palettes import Viridis
import bokeh.models
from bokeh.themes import built_in_themes
hv.extension('bokeh')
df = pd.read_excel("Refugees_V9.xlsx")
df = df.dropna()
df['year'] = df['year'].astype(int)
df['refugees'] = df['refugees'].astype(int)
hover = bokeh.models.HoverTool(tooltips=[('Refugees', '#refugees'),])
hv_ds = hv.Dataset(
data=df,
kdims=['source', 'target', 'year'],
vdims=['refugees'],
)
hv.renderer('bokeh').theme = built_in_themes['dark_minimal']
def hook(plot, element):
#plot.handles['text_1_glyph'].text_font = 'verdana'
#plot.handles['text_1_glyph'].text_font_size = '12pt'
plot.handles['text_1_glyph'].text_color = 'snow'
#plot.handles['text_2_glyph'].text_font = 'verdana'
#plot.handles['text_2_glyph'].text_font_size = '12pt'
plot.handles['text_2_glyph'].text_color = 'white'
graph = hv_ds.to(hv.Sankey)
graph.opts(
label_position='outer',
bgcolor = "#2f2f2f",
edge_color='target',
node_color='target',
show_values = False,
cmap= Viridis[10],
width=800,
height=800,
title = "Refugee migration into the Schengen-EU 2011-2022",
node_sort=False,
node_width = 20,
#tools= ['hover'],
default_tools = [hover],
show_frame=False,
edge_alpha = 0.8,
edge_hover_fill_alpha = 1,
node_alpha = 0.8,
node_hover_fill_alpha = 0.95,
label_text_font_size = "10pt",
hooks=[hook],
toolbar=None,
)
hv.output(graph, widget_location="bottom")
Data
Created Sankey
I want to show the sum of refugees over the nodes and the connecting line displaying the information how many are migrating from where to where.
I am creating a bokeh plot with a slider to refresh plot accordingly. There are 2 issues with the code posted.
1. The plot is not refreshed as per the slider. Please help in providing a fix for this issue.
2. Plot is not displayed with curdoc() when bokeh serve --show fn.ipynb is used
I'm trying to visualise this CSV file.
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool, Slider
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
#Creating Column Data Source
source = ColumnDataSource({
'x' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('District_Name')
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
#xmin, xmax = min(data.Crop_Year), max(data.Crop_Year)
#ymin, ymax = min(data.Production), max(data.Production)
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('District_Name')
}
source.data = new_data
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
layout = row(widgetbox(slider), p)
curdoc().add_root(layout)
show(layout)
Also tried a different option using CustomJS as below, but still no luck.
callback = CustomJS(args=dict(source=source), code="""
var data = source.data;
var yr = slider.value;
var x = data['x']
var y = data['y']
'x' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Area;
'y' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Production;
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district');
}
source.change.emit();
""")
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
yr_slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year', callback=callback)
callback.args["slider"] = yr_slider
Had a lot of issues trying to execute your code and I have changed some things, so feel free to correct me if did something wrong.
The error was caused by the creation of the ColumnDataSource, I had to change the level value to Crop_Year instead of Year. The loc 'ABC' also caused an error so I removed that too (And I had to add source = ColumnDataSource({, you probably forgot to copy that)
I also added a dropdown menu so it's possible to only show the data from one district.
Also, I'm not quite sure if it's possible to start a bokeh server by supplying a .ipynb file to --serve. But don't pin me down on that, I never use notebooks. I've tested this with a .py file.
#!/usr/bin/python3
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
from bokeh.models.widgets import Select, Slider
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==2001]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')=='Tamil Nadu']
#Creating Column Data Source
source = ColumnDataSource({
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
glyphs = p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
#Update glyph locations
yr = slider.value
state = select.value
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')==state]
new_data = {
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
}
source.data = new_data
#Update colors
district_list = crop_dt.loc[([state]), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
glyphs.glyph.fill_color = dict(field='district', transform=color_mapper)
glyphs.glyph.line_color = dict(field='district', transform=color_mapper)
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
#Creating drop down for state
options = list(set(crop_dt.index.get_level_values('State_Name').tolist()))
options.sort()
select = Select(title="State:", value="Tamil Nadu", options=options)
select.on_change('value', update_plot)
layout = row(widgetbox(slider, select), p)
curdoc().add_root(layout)
#Jasper Thanks a lot. This works, however it doesnt work with .loc[(['Tamil Nadu']), :]. Reason for having this is to filter the data by adding a bokeh dropdown or radio button object and refresh the plot based on the filters. The below code works only if .loc[(['Tamil Nadu']), :] is removed. Is there any other way to fix this please?
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Area.tolist(),
'y' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Production.tolist(),
'state' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('State_Name').tolist(),
'district' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').tolist()
}
source.data = new_data
I'm trying to plot some datapoint on a map in Bokeh but somehow nothing shows up, only the map background.
import pandas as pd
from IPython.core.display import HTML, display
%matplotlib inline
sample = pd.DataFrame({'Lat': [40.7260,40.7209], 'Lon': [-73.991,-74.0507], 'Count': 1})
from bokeh.plotting import figure, output_notebook, show
output_notebook()
from bokeh.tile_providers import STAMEN_TERRAIN
x_range, y_range = ((-8242000,-8210000), (4965000,4990000))
plot_width = int(750)
plot_height = int(plot_width//1.2)
def base_plot(tools='pan,wheel_zoom,reset',plot_width=plot_width, plot_height=plot_height, **plot_args):
p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
x_range=x_range, y_range=y_range, outline_line_color=None,
min_border=0, min_border_left=0, min_border_right=0,
min_border_top=0, min_border_bottom=0, **plot_args)
p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
return p
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
p.circle(x=samples['Lat'], y=samples['Lon'], **options)
show(p)
Thanks for advice.
The plot ranges are in Web Mercator units:
((-8242000,-8210000), (4965000,4990000))
But the data points in your sample DataFrame are in lat/lon units. You can either:
add an "extra range" in lat/lon units (that match up!) and have p.circle reference the extra range instead of the default range.
Convert your circle coordinates to Web Mercator
The latter is probably easier. This page has a function that can do the conversion. Using it, you'd get
sample = pd.DataFrame({
'easting': [-8236640.443285105, -8243286.216885463],
'northing': [4972010.345629457, 4971261.231184175]
})
Updating your code to use this:
import pandas as pd
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.tile_providers import STAMEN_TERRAIN
samples = pd.DataFrame({
'easting': [-8236640.443285105, -8243286.216885463],
'northing': [4972010.345629457, 4971261.231184175]
})
x_range, y_range = ((-8242000,-8210000), (4965000,4990000))
plot_width = int(750)
plot_height = int(plot_width//1.2)
def base_plot(tools='pan,wheel_zoom,reset',plot_width=plot_width, plot_height=plot_height, **plot_args):
p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
x_range=x_range, y_range=y_range, outline_line_color=None,
min_border=0, min_border_left=0, min_border_right=0,
min_border_top=0, min_border_bottom=0, **plot_args)
p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
return p
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
p.circle(x=samples['easting'], y=samples['northing'], size=20, color="red")
output_file("map.html")
show(p)
yields this plot: