I new in using bokeh.
This is what I am doing. From osmnx I get data of schools and hospitals in Haiti.
Without writing all the code I arrive to get the following
data1=dict(
x=list(schools['x'].values),
y=list(schools['y'].values)
)
data2=dict(
x=list(hospitals['x'].values),
y=list(hospitals['y'].values)
)
building = 'Schools'
buildings = {
'Schools': {
'title': 'Schools',
'data': data1,
'color': 'black'
},
'Hospitals': {
'title': 'Hospitals',
'data': data2,
'color': 'red'
}
}
building_select = Select(value=building, title='Building', options=sorted(buildings.keys()))
I would like to change the visualisation between schools and hospitals by selecting it. I define the function that change the data to take and the color.
def returnInfo(building):
dataPoints = buildings[building]['data']
color = buildings[building]['color']
return dataPoints, color
dataPoints, color = returnInfo(building)
I define the function make_plot
def make_plot(dataPoints, title, color):
TOOLS = "pan, wheel_zoom, reset,save"
p = figure(plot_width=800,
tools=TOOLS,
x_axis_location=None,
y_axis_location=None)
# Add points on top (as black points)
buildings = p.circle('x', 'y', size=4, source=data1, color=color)
hover_buildings = HoverTool(renderers = [buildings], point_policy="follow_mouse", tooltips = [("(Long, Lat)", "($x, $y)")])
p.add_tools(hover_buildings)
return p
plot = make_plot(dataPoints, "Data for " + buildings[building]['title'], color)
then I update
def update_plot(attrname, old, new):
building = building_select.value
p.title.text = "Data for " + buildings[building]['title']
src = buildings[building]['data']
dataPoints, color = returnInfo(building)
dataPoints.update
building_select.on_change('value', update_plot)
controls = column(building_select)
curdoc().add_root(row(plot, controls))
but it does not work: i.e. I am not able to change the points from schools to hospitals even if I have the cursor. Where is the error in the update section?
As first solution I suggest to use legend.click_plolicy = 'hide' to toggle visibility of your buildings on the map (Bokeh v1.1.0)
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.tile_providers import CARTODBPOSITRON_RETINA
import osmnx as ox
amenities = ['hospital', 'school']
for i, amenity in enumerate(amenities):
buildings = ox.pois_from_address("Port-au-Prince, Haiti", amenities = [amenity], distance = 3500)[['geometry', 'name', 'element_type']]
for item in ['way', 'relation']:
buildings.loc[buildings.element_type == item, 'geometry'] = buildings[buildings.element_type == item]['geometry'].map(lambda x: x.centroid)
buildings.name.fillna('Hospital' if i == 0 else 'School', inplace = True)
amenities[i] = buildings.to_crs(epsg = 3857)
p = figure(title = "Port-au-Prince, Haiti", tools = "pan,wheel_zoom,hover,reset", x_range = (-8057000, -8048500), y_range = (2098000, 2106000),
tooltips = [('Name', '#name'), ("(Long, Lat)", "($x, $y)")], x_axis_location = None, y_axis_location = None, active_scroll = 'wheel_zoom')
p.add_tile(CARTODBPOSITRON_RETINA)
p.grid.grid_line_color = None
for i, b in enumerate(amenities):
source = ColumnDataSource(data = dict(x = b.geometry.x, y = b.geometry.y, name = b.name.values))
p.circle('x', 'y', color = 'red' if i == 0 else 'blue', source = source, legend = 'Hospital' if i == 0 else 'School')
p.legend.click_policy = 'hide'
show(p)
And if you want the Select widget then here is another alternative (Bokeh v1.1.0):
from bokeh.models import ColumnDataSource, Column, Select, CustomJS
from bokeh.plotting import figure, show
from bokeh.tile_providers import CARTODBPOSITRON_RETINA
import osmnx as ox
amenities = ['hospital', 'school']
for i, amenity in enumerate(amenities):
buildings = ox.pois_from_address("Port-au-Prince, Haiti", amenities = [amenity], distance = 3500)[['geometry', 'name', 'element_type']]
for item in ['way', 'relation']:
buildings.loc[buildings.element_type == item, 'geometry'] = buildings[buildings.element_type == item]['geometry'].map(lambda x: x.centroid)
buildings.name.fillna('Hospital' if i == 0 else 'School', inplace = True)
buildings = buildings.to_crs(epsg = 3857)
amenities[i] = dict(x = list(buildings.geometry.x), y = list(buildings.geometry.y), name = list(buildings.name.values), color = (['red'] if i == 0 else ['blue']) * len(buildings.name.values))
source = ColumnDataSource(amenities[0])
p = figure(title = "Hospitals", tools = "pan,wheel_zoom,hover,reset", x_range = (-8057000, -8048500), y_range = (2098000, 2106000),
tooltips = [('Name', '#name'), ("(Long, Lat)", "($x, $y)")], x_axis_location = None, y_axis_location = None, active_scroll = 'wheel_zoom')
p.add_tile(CARTODBPOSITRON_RETINA)
p.circle(x = 'x', y = 'y', color = 'color', source = source)
p.grid.grid_line_color = None
code = ''' source.data = (cb_obj.value == 'Hospitals' ? data[0] : data[1]); p.title.text = cb_obj.value; '''
select = Select(options = ['Hospitals', 'Schools'], callback = CustomJS(args=dict(p = p, source = source, data = amenities), code = code))
show(Column(p, select))
Let me know if you need any explanation on this code.
Below are the changes required to make your code work:
In your make_plot method, since you want to update the title of the plot on selection change, replace
p = figure(plot_width=800,
tools=TOOLS,
x_axis_location=None,
y_axis_location=None)
with
p = figure(plot_width=800,
tools=TOOLS,
title=title,
x_axis_location=None,
y_axis_location=None)
Also, since you want to update the data and color of the buildings, return the buildings too in the method, so that the complete method now looks like:
def make_plot(dataPoints, title, color):
TOOLS = "pan, wheel_zoom, reset,save"
p = figure(plot_width=800,
tools=TOOLS,
title=title,
x_axis_location=None,
y_axis_location=None)
# Add points on top (as black points)
buildings = p.circle('x', 'y', size=4, source=data1, color=color)
hover_buildings = HoverTool(renderers = [buildings], point_policy="follow_mouse", tooltips = [("(Long, Lat)", "($x, $y)")])
p.add_tools(hover_buildings)
return p, buildings
Next, instead of the call to
plot = make_plot(dataPoints, "Data for " + buildings[building]['title'], color)
you need to get the returned buildings also in a variable so that it can be directly updated. So now your call will look like
plot, b = make_plot(dataPoints, "Data for " + buildings[building]['title'], color)
Finally, change your update_plot method, so that it looks like this:
def update_plot(attrname, old, new):
building = building_select.value
plot.title.text = "Data for " + buildings[building]['title']
src = buildings[building]['data']
dataPoints, color = returnInfo(building)
b.data_source.data = dataPoints
b.glyph.fill_color = color
With these changes, it would work as expected. See the results attached.
Sample data used is:
data1=dict(
x=[1,2,3],
y=[2,1,3]
)
data2=dict(
x=[1,2,3],
y=[1,3,2]
)
Related
What I want to do:
I made a dynamic map using bokeh, pandas and geopandas. The data to be displayed is loaded from a table, then mapped to a country per year. The year to be displayed is determined by a bokeh slider. You can also hover over a country and get its value. I now want to be able to change the data source by selecting a radio button. To display the data correctly, I want to change the color palette, rescale it (a range from e.g. 50 to 100 instead of 0 to 4.5), update the scaling on the slider to the new lowest year to highest year, and then display the world-map with the new data. I also want to update the title of the map from e.g. "Fertility" to "Life expectancy".
What I already have:
I have a working dynamic map with Slider and Hover tool. I also have a list from which the data to be used is taken (datapath, title to be used, color palette to be used, highest and lowest year, highest and lowest value). I have a radio button group, with three different data sources to choose from. All paths are relative, the data is provided with a consistent structure. I had the map changing the data below and displaying the new stuff, but than I did something and it stopped working. I also had the Hover tool display the right values, but with a wrong (old) description.
What I need help with:
Updating the color bar to accomodate the new palette and the new range
Updating the slider to accomodate a changed range
Updating the title displayed to show what's actually displayed
What I already tried:
I've put the whole loading and displaying in the function executed when the radio button group is changed. The first thing this function does is clearing the layout and then rebuilding it. Unfortunately, this is neither efficient, nor working, since I only get the radio button Group and an empty space below, no matter what I do. I've searched for a solution, but all I found (and tried) didn't do what I needed.
I can provide the actual code, if needed (though some of the variables and documentations are in German), but since I'm pretty new to the whole python thing, I don't now, what exactly of that about 300 lines of code you need. Just let me now, and I'll try and provide.
Hope you can help me with that.
Thanks in advance,
Asd789
EDIT: As correctly pointed out in the comments, some code to help understand what I did.
I'll cut all the imports for the sake of brevity, as a mistake there would have shown up as error in my terminal. Also leaving out comments.
geoFrame #dataframe for geopanda shapefile
configList = [0, "Fertility", 'YlGnBu', 'Year' ]
df #dataframe for the .csv file
higStep = df['step'].max()
lowStep = df['step'].min()
configList.append(higStep)
higVal = df['valueInterest'].max()
merged = geoFrame.merge(df, left_on = 'country_code', right_on = 'code')
merged_json = json.loads(merged.to_json())
json_data = json.dumps(merged_json)
geosource = GeoJSONDataSource(geojson = json_data)
palette = brewer[configList[2]][8]
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 4)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal')
p = figure(title = configList[1], plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.patches('xs','ys', source = geosource,fill_color = {'field' :'valueInterest', 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
p.add_layout(color_bar, 'below')
df_curr = df[df['step'] == higStep]
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40, nan_color = '#d9d9d9')
def json_data(selectedStep):
st = selectedStep
df_st = df[df['step'] == st]
merged = geoFrame.merge(df_st, left_on = 'country_code', right_on = 'code', how = 'left')
merged_json = json.loads(merged.to_json())
json_data = json.dumps(merged_json)
return json_data
geosource = GeoJSONDataSource(geojson = json_data(higStep))
palette = brewer[configList[2]][8]
palette = palette[::-1]
color_mapper = LinearColorMapper(palette = palette, low = 0, high = higVal/2, nan_color = '#d9d9d9')
hover = HoverTool(tooltips = [ ('Country/region','#country'),(configList[1], '#valueInterest')])
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal')
p = figure(title = configList[1], plot_height = 600 , plot_width = 950, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.patches('xs','ys', source = geosource,fill_color = {'field' :'valueInterest', 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
p.add_layout(color_bar, 'below')
def update_plot(attr, old, new):
st = slider.value
new_data = json_data(st)
geosource.geojson = new_data
p.title.text = configList[1] %st
slider = Slider(title = configList[3],start = 1950, end = 2015, step = 1, value = 2015)
slider.on_change('value', update_plot)
def radHandler(attr, new, old):
if radio.active == 0:
datapath = os.path.join(dataloc, 'children-per-woman-UN.csv')
configList = [0, "Fertility", 'YlGnBu', 'Year']
elif radio.active == 1:
#see above with diferent data
elif radio.active == 2:
#see above with different data
curdoc().clear()
higStep = df['step'].max()
lowStep = df['step'].min()
configList.append(higStep)
update_plot(attr, new, old)
hover = HoverTool(tooltips = [ ('Country/region','#country'),(configList[1], '#valueInterest')])
palette = brewer[configList[2]][8]
palette = palette[::-1]
olor_mapper = LinearColorMapper(palette = palette, low = 0, high = higVal/2, nan_color = '#d9d9d9')
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal'
p = figure(title = configList[1]+' '+str(configList[4]), plot_height = 600 , plot_width = 950, toolbar_location = None, tools = [hover])
layout = column(widgetbox(radio),p,widgetbox(slider))
curdoc().add_root(layout)
radio = RadioButtonGroup(labels=['Fertility', 'Life expectancy', 'Covid-19 total cases'], active=0)
radio.on_change('active',radHandler)
layout = column(p, widgetbox(radio),widgetbox(slider))
curdoc().title = configList[1]
curdoc().add_root(layout)
Sorry I could'nt cook it down further, but I don't know what's essential and what's just fancy stuff around.
This code works until I touch the radio button group. After that, the plot itself is just blank, without any prompts anywhere. The code itself is not entirely my fault, I got it to fix and maybe expand on it, the expansion being the ability to switch between data sources as decribed above.
I am creating a bokeh plot with a slider to refresh plot accordingly. There are 2 issues with the code posted.
1. The plot is not refreshed as per the slider. Please help in providing a fix for this issue.
2. Plot is not displayed with curdoc() when bokeh serve --show fn.ipynb is used
I'm trying to visualise this CSV file.
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool, Slider
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
#Creating Column Data Source
source = ColumnDataSource({
'x' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==2001].loc[(['ABC']), :].index.get_level_values('District_Name')
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
#xmin, xmax = min(data.Crop_Year), max(data.Crop_Year)
#ymin, ymax = min(data.Production), max(data.Production)
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Area,
'y' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].Production,
'state' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('State_Name'),
'district' : crop_dt[crop_dt.index.get_level_values('Year')==yr].loc[(['ABC']), :].index.get_level_values('District_Name')
}
source.data = new_data
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
layout = row(widgetbox(slider), p)
curdoc().add_root(layout)
show(layout)
Also tried a different option using CustomJS as below, but still no luck.
callback = CustomJS(args=dict(source=source), code="""
var data = source.data;
var yr = slider.value;
var x = data['x']
var y = data['y']
'x' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Area;
'y' = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['ABC']), :].Production;
p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district');
}
source.change.emit();
""")
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
yr_slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year', callback=callback)
callback.args["slider"] = yr_slider
Had a lot of issues trying to execute your code and I have changed some things, so feel free to correct me if did something wrong.
The error was caused by the creation of the ColumnDataSource, I had to change the level value to Crop_Year instead of Year. The loc 'ABC' also caused an error so I removed that too (And I had to add source = ColumnDataSource({, you probably forgot to copy that)
I also added a dropdown menu so it's possible to only show the data from one district.
Also, I'm not quite sure if it's possible to start a bokeh server by supplying a .ipynb file to --serve. But don't pin me down on that, I never use notebooks. I've tested this with a .py file.
#!/usr/bin/python3
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource, CategoricalColorMapper, HoverTool
from bokeh.plotting import figure, curdoc
from bokeh.palettes import viridis
from bokeh.layouts import row, widgetbox
from bokeh.models.widgets import Select, Slider
#Importing and processing data file
crop = pd.read_csv('crop_production.csv')
#Cleaning Data
crop.fillna(np.NaN)
crop['Season'] = crop.Season.str.strip()
#Removing Whitespace #Filtering the dataset by Season
crop_season = crop[crop.Season == 'Whole Year']
crop_dt = crop_season.groupby(['State_Name', 'District_Name', 'Crop_Year']).mean().round(1)
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==2001]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')=='Tamil Nadu']
#Creating Column Data Source
source = ColumnDataSource({
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
})
#Creating color palette for plot
district_list = crop_dt.loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
# Creating the figure
p = figure(
title = 'Crop Area vs Production',
x_axis_label = 'Area',
y_axis_label = 'Production',
plot_height=900,
plot_width=1200,
tools = [HoverTool(tooltips='#district')]
)
glyphs = p.circle(x='x', y='y', source=source, size=12, alpha=0.7,
color=dict(field='district', transform=color_mapper),
legend='district')
p.legend.location = 'top_right'
def update_plot(attr, old, new):
#Update glyph locations
yr = slider.value
state = select.value
crop_dt_year = crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr]
crop_dt_year_state = crop_dt_year[crop_dt_year.index.get_level_values('State_Name')==state]
new_data = {
'x': crop_dt_year_state.Area.tolist(),
'y': crop_dt_year_state.Production.tolist(),
'state': crop_dt_year_state.index.get_level_values('State_Name').tolist(),
'district': crop_dt_year_state.index.get_level_values('District_Name').tolist()
}
source.data = new_data
#Update colors
district_list = crop_dt.loc[([state]), :].index.get_level_values('District_Name').unique().tolist()
call_colors = viridis(len(district_list))
color_mapper = CategoricalColorMapper(factors=district_list, palette=call_colors)
glyphs.glyph.fill_color = dict(field='district', transform=color_mapper)
glyphs.glyph.line_color = dict(field='district', transform=color_mapper)
#Creating Slider for Year
start_yr = min(crop_dt.index.get_level_values('Crop_Year'))
end_yr = max(crop_dt.index.get_level_values('Crop_Year'))
slider = Slider(start=start_yr, end=end_yr, step=1, value=start_yr, title='Year')
slider.on_change('value',update_plot)
#Creating drop down for state
options = list(set(crop_dt.index.get_level_values('State_Name').tolist()))
options.sort()
select = Select(title="State:", value="Tamil Nadu", options=options)
select.on_change('value', update_plot)
layout = row(widgetbox(slider, select), p)
curdoc().add_root(layout)
#Jasper Thanks a lot. This works, however it doesnt work with .loc[(['Tamil Nadu']), :]. Reason for having this is to filter the data by adding a bokeh dropdown or radio button object and refresh the plot based on the filters. The below code works only if .loc[(['Tamil Nadu']), :] is removed. Is there any other way to fix this please?
def update_plot(attr, old, new):
yr = slider.value
new_data = {
'x' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Area.tolist(),
'y' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].Production.tolist(),
'state' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('State_Name').tolist(),
'district' : crop_dt[crop_dt.index.get_level_values('Crop_Year')==yr].loc[(['Tamil Nadu']), :].index.get_level_values('District_Name').tolist()
}
source.data = new_data
Trying to make a choropleth map in plotly using some data I have in a csv file. Have created the following map:
my choromap
This isn't a correct display of the data however. Here is an excerpt of my csv file:
China,2447
...
Trinidad And Tobago,2
Turkey,26
Ukraine,8
United Arab Emirates,97
United States of America,2008
Based on this I'd expected China to appear in a similar colour to that which the US has loaded in, however it looks the same as countries with values of less than 200. Does anyone know what the reason for this is?
Here's my full code for reference:
import pandas as pd
import plotly as py
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [dict(type='choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
text = df['Country'],
colorbar = {'title':'Apps per country'},
colorscale = 'Jet',
reversescale = False
)]
layout = dict(title='Application Jan-June 2018',
geo = dict(showframe=False,projection={'type':'mercator'}))
choromap = dict(data = data,layout = layout)
red = py.offline.plot(choromap,filename='world.html')
per your comment I would make sure that china is indeed 2447 and not something like 244. I would also follow the plotly documentation although you example code works.
import plotly.plotly as py
import pandas as pd
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [ dict(
type = 'choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
colorscale = 'Jet',
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
tickprefix = '',
title = 'Apps per country'),
) ]
layout = dict(
title = 'app_country_data_minus_uk',
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )
or if you want to plot it offline:
import plotly.plotly as py
import pandas as pd
import plotly
df = pd.read_csv('app_country_data_minus_uk.csv')
data = [ dict(
type = 'choropleth',
locations = df['Country'],
locationmode = 'country names',
z = df['Applications'],
colorscale = 'Jet',
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
title = 'Apps per country'),
) ]
layout = dict(
title = 'app_country_data_minus_uk',
geo = dict(
showframe = True,
showcoastlines = True,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
plotly.offline.plot(fig,filename='world.html')
If you use iplot you will be able to edit the chart and see the data in plotly to make sure your data looks correct
import pandas as pd
import numpy as np
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs
from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
output_notebook()
def histogram_tab(webs):
def make_dataset(params_list, range_start = 0.0, range_end = 1, bin_width = 0.005):
#check to make sure the start is less than the end
assert range_start < range_end, "Start must be less than end!"
#by_params = pd.DataFrame(columns=[ ,'Max', 'Avarage', 'Min','color'])
by_params = pd.DataFrame(columns=[ 'left','right', 'proportion', 'p_proportion','p_interval', 'name', 'w_name','color'])
#
range_extent = range_end - range_start
values = ['Min', "Avarage", 'Max']
# Iterate through all the parameters
for i, para_name in enumerate(params_list):
#print para_name
# Subset to the parameter
subset = webs[para_name]
# note: subset have to be a list of values
# [webs.columns[i%6]]
# Create a histogram with specified bins and range
arr_hist, edges = np.histogram(subset,
bins = int(range_extent / bin_width),
range = [range_start, range_end])
# Divide the counts by the total to get a proportion and create df
arr_df= pd.DataFrame({'proportion': arr_hist ,
'left': edges[:-1], 'right': edges[1:]}) #/ np.sum(arr_hist)
# Format the proportion
arr_df['p_proportion'] = ['%0.00005f' % proportion for proportion in arr_df['proportion']]
# Format the interval
arr_df['p_interval'] = ['%d to %d scale' % (left, right) for left,
right in zip(arr_df['left'], arr_df['right'])]
# Assign the parameter for labels
arr_df['name'] = para_name
arr_df['w_name'] = webs['Site name']
# Color each parametr differently
arr_df['color'] = Category20_16[i]
# Add to the overall dataframe
by_params = by_params.append(arr_df)
# Overall dataframe
by_params = by_params.sort_values(['name','left'])
return ColumnDataSource(by_params)
def style(p):
# Title
p.title.align = 'center'
p.title.text_font_size ='20pt'
p.title.text_font = 'serif'
# Axis titles
p.xaxis.axis_label_text_font_size = '14pt'
p.xaxis.axis_label_text_font_style = 'bold'
p.yaxis.axis_label_text_font_size = '14pt'
p.yaxis.axis_label_text_font_style = 'bold'
# Tick labels
p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'
return p
def make_plot(src):
# Blank plot with correct labels
p = figure(plot_width = 700, plot_height = 700,
title = "Histogram of Parametes for the websites",
x_axis_label = 'parameters', y_axis_label = "values")
# Quad glyphs to create a histogram
p.quad(source=src, bottom =0,left = 'left', right = 'right', color ='color', top= 'proportion',fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
hover_fill_alpha = 1.0, line_color = 'white') #top='proportion',
# Hover tool with vline mode
hover = HoverTool(tooltips=[('Parameter','#name'),
('Website','#w_name'),
('Proportion','p_proportion')
],
mode='vline')
p.add_tools(hover)
# Stypling
p = style(p)
return p
# Update function takes three default parameters
def update(attr, old, new):
# Get the list of parameter for the graph
parameter_to_plot = [para_selection.labels[i] for i in para_selection.active]
# Make a new dataset based on the selected parameter and the
# make_dataset function defined earlier
new_src = make_dataset(parameter_to_plot, range_start = 0, range_end = 1, bin_width = 0.005) # note range are not specified
# Convert dataframe to column data source
new_src = ColumnDataSource(new_src)
# Update the source used the quad glpyhs
src.data.update(new_src.data)
list_of_params = list(webs.columns[1:].unique())
list_of_params.sort()
para_selection = CheckboxGroup(labels=list_of_params, active = [0,1])
para_selection.on_change('active',update)
binwidth_select = Slider(start =0, end = 1,
step = 0.00025, value = 0.0005,
title = 'Change in parameter')
binwidth_select.on_change('value', update)
range_select = RangeSlider(start=0, end=1, value =(0,1),
step=0.00025, title = 'Change in range')
range_select.on_change('value', update)
initial_params = [para_selection.labels[i] for i in para_selection.active]
src = make_dataset(initial_params,
range_start = range_select.value[0],
range_end = range_select.value[1],
bin_width = binwidth_select.value)
p = make_plot(src)
#show(p)
# Put controls in a single element
controls = WidgetBox(para_selection, binwidth_select, range_select)
# Create a row layout
layout = row(controls, p)
# Make a tab with the layout
tab = Panel(child = layout, title = 'Histogram')
#return tab
tabs = Tabs(tabs=[tab])
webs.add_root(tabs)
# Set up an application
handler = FunctionHandler(histogram_tab(webs))
app = Application(handler)
add_root is a method on Document, you are trying to call it on a DataFrame called webs, apparently, which is why you get that message. The structure of a Bokeh app in a notebook should look like this:
# create a function to define the app, must accept "doc" as the parameter
def myfunc(doc):
# make Bokeh objects
# add stuff to doc
doc.add_root(stuff)
# pass the function, but *don't* execute it
handler = FunctionHandler(myfunc)
app = Application(handler)
Note that the last two lines are not necessary in recent version of Bokeh, you can just call:
show(myfunc)
directly. There is a full example in the repo:
https://github.com/bokeh/bokeh/blob/master/examples/howto/server_embed/notebook_embed.ipynb
Your code should be structured very similarly to that.
I'm using Plotly's Python interface to generate a network. I've managed to create a network with my desired nodes and edges, and to control the size of the nodes.
I am desperately looking for help on how to do the following:
add node labels
add edge labels according to a list of weights
control the edge line width according to a list of weights
All this without using the "hovering" option, as it has to go in a non-interactive paper. I'd greatly appreciate any help! Plotly's output |
In case this fails, the figure itself |
matrix.csv
This is my code (most is copy-pasted from the Plotly tutorial for Networkx):
import pandas as pd
import plotly.plotly as py
from plotly.graph_objs import *
import networkx as nx
matrix = pd.read_csv("matrix.csv", sep = "\t", index_col = 0, header = 0)
G = nx.DiGraph()
# add nodes:
G.add_nodes_from(matrix.columns)
# add edges:
edge_lst = [(i,j, matrix.loc[i,j])
for i in matrix.index
for j in matrix.columns
if matrix.loc[i,j] != 0]
G.add_weighted_edges_from(edge_lst)
# create node trace:
node_trace = Scatter(x = [], y = [], text = [], mode = 'markers',
marker = Marker(
showscale = True,
colorscale = 'YIGnBu',
reversescale = True,
color = [],
size = [],
colorbar = dict(
thickness = 15,
title = 'Node Connections',
xanchor = 'left',
titleside = 'right'),
line = dict(width = 2)))
# set node positions
pos = nx.spring_layout(G)
for node in G.nodes():
G.node[node]['pos']= pos[node]
for node in G.nodes():
x, y = G.node[node]['pos']
node_trace['x'].append(x)
node_trace['y'].append(y)
# create edge trace:
edge_trace = Scatter(x = [], y = [], text = [],
line = Line(width = [], color = '#888'),
mode = 'lines')
for edge in G.edges():
x0, y0 = G.node[edge[0]]['pos']
x1, y1 = G.node[edge[1]]['pos']
edge_trace['x'] += [x0, x1, None]
edge_trace['y'] += [y0, y1, None]
edge_trace['text'] += str(matrix.loc[edge[0], edge[1]])[:5]
# size nodes by degree
deg_dict = {deg[0]:int(deg[1]) for deg in list(G.degree())}
for node, degree in enumerate(deg_dict):
node_trace['marker']['size'].append(deg_dict[degree] + 20)
fig = Figure(data = Data([edge_trace, node_trace]),
layout = Layout(
title = '<br>AA Substitution Rates',
titlefont = dict(size = 16),
showlegend = True,
margin = dict(b = 20, l = 5, r = 5, t = 40),
annotations = [dict(
text = "sub title text",
showarrow = False,
xref = "paper", yref = "paper",
x = 0.005, y = -0.002)],
xaxis = XAxis(showgrid = False,
zeroline = False,
showticklabels = False),
yaxis = YAxis(showgrid = False,
zeroline = False,
showticklabels = False)))
py.plot(fig, filename = 'networkx')
So
1. The solution to this is relative easy, you create a list with the node ids and you set it in the text attribute of the scatter plot. Then you set the mode as "markers+text" and you're done.
2. This is a little bit more tricky. You have to calculate the middle of each line and create a list of dicts including the line's middle position and weight. Then you add set as the layout's annotation.
3. This is too compicated to be done using plotly IMO. As for now I am calculating the position of each node using networkx spring_layout function. If you'd want to set the width of each line based on its weight you would have to modify the position using a function that takes into account all the markers that each line is attached to.
Bonus I give you the option to color each of the graph's components differently.
Here's a (slightly modified) function I made a while ago that does 1 and 2:
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import networkx as nx
def scatter_plot_2d(G, folderPath, name, savePng = False):
print("Creating scatter plot (2D)...")
Nodes = [comp for comp in nx.connected_components(G)] # Looks for the graph's communities
Edges = G.edges()
edge_weights = nx.get_edge_attributes(G,'weight')
labels = [] # names of the nodes to plot
group = [] # id of the communities
group_cnt = 0
print("Communities | Number of Nodes")
for subgroup in Nodes:
group_cnt += 1
print(" %d | %d" % (group_cnt, len(subgroup)))
for node in subgroup:
labels.append(int(node))
group.append(group_cnt)
labels, group = (list(t) for t in zip(*sorted(zip(labels, group))))
layt = nx.spring_layout(G, dim=2) # Generates the layout of the graph
Xn = [layt[k][0] for k in list(layt.keys())] # x-coordinates of nodes
Yn = [layt[k][1] for k in list(layt.keys())] # y-coordinates
Xe = []
Ye = []
plot_weights = []
for e in Edges:
Xe += [layt[e[0]][0], layt[e[1]][0], None]
Ye += [layt[e[0]][1], layt[e[1]][1], None]
ax = (layt[e[0]][0]+layt[e[1]][0])/2
ay = (layt[e[0]][1]+layt[e[1]][1])/2
plot_weights.append((edge_weights[(e[0], e[1])], ax, ay))
annotations_list =[
dict(
x=plot_weight[1],
y=plot_weight[2],
xref='x',
yref='y',
text=plot_weight[0],
showarrow=True,
arrowhead=7,
ax=plot_weight[1],
ay=plot_weight[2]
)
for plot_weight in plot_weights
]
trace1 = go.Scatter( x=Xe,
y=Ye,
mode='lines',
line=dict(color='rgb(90, 90, 90)', width=1),
hoverinfo='none'
)
trace2 = go.Scatter( x=Xn,
y=Yn,
mode='markers+text',
name='Nodes',
marker=dict(symbol='circle',
size=8,
color=group,
colorscale='Viridis',
line=dict(color='rgb(255,255,255)', width=1)
),
text=labels,
textposition='top center',
hoverinfo='none'
)
xaxis = dict(
backgroundcolor="rgb(200, 200, 230)",
gridcolor="rgb(255, 255, 255)",
showbackground=True,
zerolinecolor="rgb(255, 255, 255)"
)
yaxis = dict(
backgroundcolor="rgb(230, 200,230)",
gridcolor="rgb(255, 255, 255)",
showbackground=True,
zerolinecolor="rgb(255, 255, 255)"
)
layout = go.Layout(
title=name,
width=700,
height=700,
showlegend=False,
plot_bgcolor="rgb(230, 230, 200)",
scene=dict(
xaxis=dict(xaxis),
yaxis=dict(yaxis)
),
margin=dict(
t=100
),
hovermode='closest',
annotations=annotations_list
, )
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
plotDir = folderPath + "/"
print("Plotting..")
if savePng:
plot(fig, filename=plotDir + name + ".html", auto_open=True, image = 'png', image_filename=plotDir + name,
output_type='file', image_width=700, image_height=700, validate=False)
else:
plot(fig, filename=plotDir + name + ".html")
The d3graph library provides the functionalities you want.
pip install d3graph
I downloaded your data and imported it for demonstration:
# Import data
df = pd.read_csv('data.csv', index_col=0)
# Import library
from d3graph import d3graph
# Convert your Pvalues. Note that any edge is set when a value in the matrix is >0. The edge width is however based on this value. A conversion is therefore useful when you work with Pvalues.
df[df.values==0]=1
df = -np.log10(df)
# Increase some distance between edges. Maybe something like this.
df = (np.exp(df)-1)/10
# Make the graph with default settings
d3 = d3graph()
# Make the graph by setting some parameters
d3.graph(df)
# Set edge properties
d3.set_edge_properties(directed=True)
# Set node properties
d3.set_node_properties(color=df.columns.values, size=size, edge_size=10, edge_color='#000000', cmap='Set2')
This will result in an interactive network graph. Two screenshots: one with the default settings and the one with tweaked settings. More examples can be found here.