I am having a Pandas dataframe, which I need to write it to an excel and then do color formating and plot a chart on the same sheet.
I have used StyleFrame to do the Coloring & Borders to my Dataframe, but this StyleFrame not works with Pandas XlsxWriter Object. And in turn, Plotting of chart from this styled dataframe is not working.
Can any one please share the solution for this!?
import pandas as pd
import xlfunc
import genfunc
from StyleFrame import StyleFrame, Styler, utils
def applystyle(df):
sf=StyleFrame(df)
sf.apply_column_style(cols_to_style=df.columns,
styler_obj=Styler(bg_color=utils.colors.white, bold=True, font=utils.fonts.calibri,
font_size=8), style_header=True)
sf.apply_headers_style(
styler_obj=Styler(bg_color=utils.colors.blue, bold=True, font_size=8, font_color=utils.colors.white,
number_format=utils.number_formats.general, protection=False))
sf.set_column_width(columns=sf.columns, width=15)
sf.set_row_height(rows=sf.row_indexes, height=12)
return sf
def createchart(regionname,workbook,outxl,sheetname,cellid,charttitle,startrow,startcol,endrow,endcol):
worksheet = outxl.sheets[sheetname]
kpichart = workbook.add_chart({'type': 'line'})
bsckpi = workbook.add_chart({'type': 'column'})
for col_num in range(startcol+1, endcol-1):
kpichart.set_title(
{'name': charttitle, 'name_font': {'name': 'Cambria', 'size': 18, 'color': '#000000'}})
kpichart.add_series({
'name': [sheetname, startrow, col_num],
'categories': [sheetname, startrow+1, 1, endrow, 1],
'values': [sheetname, startrow+1, col_num, endrow, col_num],
})
nodekpi.add_series({
'name': [sheetname, startrow, endcol-1],
'categories': [sheetname,startrow+1,1,endrow,1],
'values': [sheetname,startrow+1,endcol-1,endrow,endcol-1],
'y2_axis': True,
})
kpichart.combine(nodekpi)
kpichart.set_x_axis({'name': 'Date'})
kpichart.set_x_axis({'num_font': {'name': 'Cambria', 'size': 10, 'color': '#000000'}})
kpichart.set_y_axis({'name': charttitle, 'minor_gridlines': {'visible': True, 'color': '#FFFFFF'}})
kpichart.set_y_axis({'num_font': {'name': 'Cambria', 'size': 10, 'color': '#000000'}})
worksheet.insert_chart(cellid, kpichart, {'x_scale': 1.75, 'y_scale': 1.25})
def df_to_xl(regionname,hostlist,timerdf,hostData):
outxl = pd.ExcelWriter("timer_audit_data.xlsx", engine='xlsxwriter')
stylexl = StyleFrame.ExcelWriter("timer_audit_data_styled.xlsx")
outxl.sheets.clear()
workbook = outxl.book
stylebook = stylexl.book
style_timerdf = applystyle(timerdf)
style_timerdf.to_excel(stylexl, sheet_name='Parameters', index=False)
timerdfnew = timerdf
# Pivot
timer_summary = timerdfnew.pivot_table(values='NODE', index='HOST', columns='timer', aggfunc="count")
timer_summary = applystyle(timer_summary)
timer_summary.to_excel(stylexl, sheet_name='Parameters', startrow=0, startcol=15)
for hostname in hostlist:
hostname = genfunc.toString(hostname)
hostData.to_excel(outxl,sheet_name=hostname,startrow=1,startcol=1,index=False)
createchart(regionname,workbook,outxl,hostname,'G2',"Timer Performance"+hostname,1,1,tr+1,tc+1)
#In this Section I am pasting the dataframe to excel workbook & I am preparing a chart in the same sheet
#But here i would like to apply some colors and borders to hostData pasted in excel and then I want to prepare chart in the same sheet
outxl.save()
stylexl.save()
It would be much better if you draw your chart using openpyxl package since StyleFrame uses openpyxl engine and xlsxwriter package does not work with existing files as far as I know.
This example works for me with openpyxl version 2.2.5 and StyleFrame version 1.2
import pandas as pd
from openpyxl.charts import BarChart, Reference, Series
from StyleFrame import StyleFrame, Styler, utils
def apply_style(sf):
sf.apply_headers_style(styler_obj=Styler(bg_color=utils.colors.black, font_color=utils.colors.white, bold=True))
sf.apply_column_style(cols_to_style=sf.columns, styler_obj=Styler(font_color=utils.colors.blue))
return sf
def draw_chart_openpyxl(worksheet):
# pos: (row, column)
chart = BarChart()
labels = Reference(worksheet, pos1=(2, 1), pos2=(4, 1))
valuesA = Reference(worksheet, pos1=(2, 2), pos2=(4, 2))
seriesA = Series(valuesA, title='A', labels=labels)
chart.append(seriesA)
valuesB = Reference(worksheet, pos1=(2, 3), pos2=(4, 3))
seriesB = Series(valuesB, title='B', labels=labels)
chart.append(seriesB)
chart.drawing.top = 100
chart.drawing.left = 200
chart.drawing.width = 300
chart.drawing.height = 200
worksheet.add_chart(chart)
return worksheet
if __name__ == '__main__':
ew = StyleFrame.ExcelWriter('output.xlsx')
df = pd.DataFrame({
'A': [1, 2, 3],
'B': [4, 5, 6]
}, columns=['A', 'B'], index=['Row 1', 'Row 2', 'Row 3'])
sf = StyleFrame(df)
sf = apply_style(sf)
sf.to_excel(excel_writer=ew, sheet_name='Sheet1')
draw_chart_openpyxl(ew.book.get_sheet_by_name('Sheet1'))
ew.save()
You can find here many usage examples.
Related
New to Pandas as of now.
My Problem statement is I am trying to open an existing excel sheet,
Traverse through the values present in that,
add an if condition and change the font colour of text if the condition is true.
This is the sample excel where I am trying to change the color:
Below is my code which I have tried:
def highlight_cells(val):
color = 'red' if val =='DATA' else '#C6E2E9'
return 'color: %s' % color
ddf = pd.read_excel(PathToTheExcelFile)
ddf.style.applymap(highlight_cells)
ddf.to_excel(PathToTheExcelFile,index=False)
What I am currently getting is this:
What I want is this:
.style.applymap produces a Styler object, which has a to_excel method to conveniently export it:
def highlight_cells(val):
color = 'red' if val == 'DATA' else '#C6E2E9'
return 'color: %s' % color
ddf.style.applymap(highlight_cells).to_excel("data.xlsx", index=False)
# If you want to stylize only the Comments column
ddf.style.applymap(highlight_cells, subset="Comments").to_excel("data.xlsx", index=False)
Result:
The style.applymap is for showing output of dataframes in HTML, not updating excel sheets. You can change the code thus to update the font in excel. I am reading the excel input.xlsx, updating the contents using openpyxl and writing it to output.xlsx. You can change other things like size, bold, fontname, etc. as well. Note: Color used is HEX color, but without the # symbol in front
import openpyxl
wb = openpyxl.load_workbook(filename="input.xlsx")
ws = wb.active
for row in range(2,ws.max_row+1): #Skipping first row as I assume it is header
if ws.cell(row=row, column=3).value == 'DATA':
ws.cell(row=row, column=3).font = openpyxl.styles.Font(color='FF0000') #, size=16, bold=True, name='Calibri')
else:
ws.cell(row=row, column=3).font = openpyxl.styles.Font(color='C6E2E9')
wb.save("output.xlsx")
USING pandas.ExcelWriter instead of openpyxl
You can use the below code pandas.ExcelWriter to change the font to RED for DATA and GREEN for others. Note: you can edit the colors to anything you want using # followed by the 6 char hexcode in case you want to change the font color
import pandas as pd
import numpy as np
df = pd.read_excel('output.xlsx')
df.fillna('NA', inplace = True)
writer = pd.ExcelWriter('output1.xlsx')
df.to_excel(writer, sheet_name= 'sheet1', index=False)
worksheet = writer.sheets['sheet1']
workbook = writer.book
cell_format_red = workbook.add_format({'font_color': 'red'})
cell_format_green = workbook.add_format({'font_color': 'green'})
start_row = 1
start_col = 2
end_row = len(df)
end_col = start_col
worksheet.conditional_format(start_row, start_col, end_row, end_col, {'type': 'cell', 'criteria': '==', 'value': '"DATA"', 'format': cell_format_red})
worksheet.conditional_format(start_row, start_col, end_row, end_col, {'type': 'cell', 'criteria': '!=', 'value': '"DATA"', 'format': cell_format_green})
writer.save()
UPDATE: Fixed. Sheet protected, selected column is possible to unhide.
Is it possible to lock all visible cells (protection from editing), but still allow the users to unhide certain columns?
I would like to export a pandas dataframe via pd.excel_writer().
worksheet.protect() doesn't allow for any arguments?
I apply column-wise formatting and tried 'lock': False on the hidden columns, but that didn't work.
# init
import pandas as pd
import random as rd
import string
random.seed(10)
S = 10
string_col = []
number_col1 = [] # protect
number_col2 = [] # hide
# create testdata
for i in range(0, 20):
ran_str = ''.join(rd.choices(string.ascii_uppercase + string.digits, k = S))
ran_num1 = ''.join(str(rd.randrange(S)))
ran_num2 = ''.join(str(rd.randrange(S)))
string_col.append(ran_str)
number_col1.append(ran_num1)
number_col2.append(ran_num2)
testframe = pd.DataFrame(
{'String_col': string_col,
'Hide': number_col1,
'Protect': number_col2
})
# helperfunction for selecting columns
def getColnameByPosition(pos):
from string import ascii_uppercase
colnames = list(ascii_uppercase)
for col in ascii_uppercase:
for col2 in ascii_uppercase:
colnames.append(col+col2)
return colnames[pos]
# export
writer = pd.ExcelWriter("./FormatTest.xlsx", engine='xlsxwriter')
testframe.to_excel(writer, sheet_name="Sheet0", startrow=0, header=True, index=False)
workbook = writer.book
worksheet = writer.sheets["Sheet0"]
# protect and hide format
format_protect = workbook.add_format({'bg_color': '#ADD8E6','font_name': 'Calibri', 'font_size': 9, 'num_format': '#,##0', "locked": True})
format_hide = workbook.add_format({'bg_color': '#FFFFFF', 'font_name': 'Calibri', 'font_size': 9, 'num_format': '#,##0', "locked": False})
prot_col = getColnameByPosition(testframe.columns.get_loc("Protect"))
hide_col = getColnameByPosition(testframe.columns.get_loc("Hide"))
worksheet.set_column(prot_col+':'+prot_col, 10, format_protect)
worksheet.set_column(hide_col+':'+hide_col, 10, format_hide, {'hidden': True})
# FIX: allow for unhiding.
worksheet.protect('', {'format_columns': True})
writer.save()
You just need to set protect() to turn on worksheet protection but turn on the exceptions that you want. In this case turning on "Format Columns" should get you what you want:
# ...
worksheet.set_column(prot_col+':'+prot_col, 10, format_protect)
worksheet.set_column(hide_col+':'+hide_col, 10, format_hide, {'hidden': True})
worksheet.protect('', {'format_columns': True})
writer.save()
Also, set_column() can take (col, col) notation so you could drop the getColnameByPosition() code and just do this:
# ...
prot_col = testframe.columns.get_loc("Protect")
hide_col = testframe.columns.get_loc("Hide")
worksheet.set_column(prot_col, prot_col, 10, format_protect)
worksheet.set_column(hide_col, hide_col, 10, format_hide, {'hidden': True})
worksheet.protect('', {'format_columns': True})
writer.save()
import os
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import xlrd
import openpyxl
from openpyxl import load_workbook
import datetime
from dataclasses import dataclass
from openpyxl.styles import Font,Color,Alignment,Border,Side,colors
import numpy as np
from xlsxwriter.utility import xl_rowcol_to_cell
import xlwt
from xlwt import Workbook
import functools
import numpy as np
from itertools import repeat, chain
import glob
root= tk.Tk()
canvas1 = tk.Canvas(root, width = 300, height = 300, bg = 'lightsteelblue')
canvas1.pack()
def getExcel():
global df
import_file_path = filedialog.askopenfilename()
df = pd.read_excel (import_file_path)
del df['PART CODE']
del df['SUPPLIER CODE']
del df['COMPANY OR SUB-CONT']
del df['SUB-CONT UNIT COST']
del df['HLR']
del df['HOURLY MULTIPLIER']
del df['MATERIAL MULTIPLIER']
del df ['ROUGH-IN HOURS']
del df['FINISH HOURS']
del df['PRELIMINARY TEXT']
del df['FORMAL TEXT']
del df['SUBCONT TEXT']
del df['Part Image Path']
df.insert(0,'Id','')
df.insert(1,'M','M')
df.insert(3,'SubCategory', '')
df.insert(4,'DrillDowns', '')
df.insert(6, 'Name', '')
df = df.rename(columns={'PART UNIT TYPE': 'MeasurementType'})
df.insert(8,'OnOffSwitch', 'No')
df['SubCategory'] = df['CATEGORY']
df = df.rename(columns={'PART DESCRIPTION': 'Note'})
df['Name'] = df['Note']
df = df.rename(columns={'COMPANY UNIT COST': 'PRICE'})
new_row = pd.DataFrame({'Id':'(BLANK = NEW)', 'M':'P', 'CATEGORY':'Brand',
'SubCategory':'Name', 'DrillDowns':'Price - Bathroom {nwPFGtikvZ}', 'Name':'Price - HVAC {cp7lAPx4IO}', 'Note': 'Price - XPS1 {qX8FFEVmqP}',
'MeasurementType':'Price - PRIME {atGoZ7zLsE}', 'OnOffSwitch':'Price - ARCHIVE {NtbEEROpa9}', 'NeedToReplace':'Price - FLOORING {AskrHJL9ab}', 'NeedToReplace1':'Price - TEST {jOn0TaUDmU}', 'NeedToReplace2':'Price - Kitchen Refacing {9iFFUgrQBr}', 'NeedToReplace3':'Price - EAGLE EYES {X8ExSUDoFH}', 'NeedToReplace4':'Price - Basement {ajuemFbXaL}', 'NeedToReplace5': 'Price - Egress Windows {69790nzjKb}'},
index =[0])
# simply concatenate both dataframes
df = pd.concat([new_row, df]).reset_index(drop = True)
df = df.fillna('')
#new_row1 = pd.DataFrame({'Id':' ', 'M':'P', 'CATEGORY': 'SITE-PREP'}, index= [2])
switches = df['M'].ne(df['M'].shift(16000))
idx = switches[switches].index
df_new = pd.DataFrame(index=idx + 1.5)
df = pd.concat([df, df_new]).sort_index()
#df = pd.concat([new_row1, df]).reset_index(drop = True)
df = df.fillna('NO VALUE')
df.M = df.M.replace({'NO VALUE': "P"})
df.Id = df.Id.replace({'NO VALUE': ""})
df.DrillDowns = df.DrillDowns.replace({'NO VALUE': "xxxx"})
df.Name = df.Name.replace({'NO VALUE': "xxxx"})
df.Note = df.Note.replace({'NO VALUE': "xxxx"})
df.MeasurementType = df.MeasurementType.replace({'NO VALUE': "xxxx"})
df.OnOffSwitch = df.OnOffSwitch.replace({'NO VALUE': "xxxx"})
df.NeedToReplace = df.NeedToReplace.replace({'NO VALUE': "xxxx"})
df.NeedToReplace1 = df.NeedToReplace1.replace({'NO VALUE': "xxxx"})
df.NeedToReplace2 = df.NeedToReplace2.replace({'NO VALUE': "xxxx"})
df.NeedToReplace3 = df.NeedToReplace3.replace({'NO VALUE': "xxxx"})
df.NeedToReplace5 = df.NeedToReplace3.replace({'NO VALUE': "xxxx"})
df['NeedToReplace4'] = df['PRICE'].shift(1)
df = df.fillna("Price - Basement {ajuemFbXaL}")
df.NeedToReplace4 = df.NeedToReplace4.replace({'NO VALUE': ""})
del df['PRICE']
df = df.rename(columns={'NeedToReplace': ''})
df = df.rename(columns={'NeedToReplace1': ' '})
df = df.rename(columns={'NeedToReplace2': ' '})
df = df.rename(columns={'NeedToReplace3': ' '})
df = df.rename(columns={'NeedToReplace4': ' '})
df = df.rename(columns={'NeedToReplace5': ' '})
df.CATEGORY.replace('NO VALUE',df.Note.shift(1),inplace=True)
df.SubCategory.replace('NO VALUE',df.Name.shift(1),inplace=True)
pd.set_option('display.max_rows', df.shape[0]+1)
pd.set_option('display.max_colwidth', None)
browseButton_Excel = tk.Button(text='Select Excel File', command=getExcel, bg='green', fg='white', font=('helvetica', 12, 'bold'))
canvas1.create_window(150, 150, window=browseButton_Excel)
root.mainloop()
df.reset_index(drop=True)
df.to_excel(r'C:\Users\Larso\Desktop\ClearEstimatesEstimate\LeapPriceGuideExport.xlsx', sheet_name='Price Guide', index = False)
file1 = pd.read_excel("LeapPriceGuideExport.xlsx")
file2 = pd.read_excel("test.xlsx")
file3 = file1.merge(file2, on="ID", how="outer")
file3.to_excel("merged.xlsx")
This is my current code above, im editing an excel file to go into an existing .xlsx file, but every time i try to merge the files i get error code
FileNotFoundError: [Errno 2] No such file or directory: 'LeapPriceGuideExport.xlsx'
i clearly see the file in the folder, but not sure what i am missing, thank you, and please let me know if things didnt come through clearly, i am stilling getting used to posting and asking questions, thank you.
I am creating an excel file with multiple sheets using xlsxwriter as engine.
In each sheet the row color is based on value of column named colour
But the color is not visible in my excel file.
import pandas as pd
def row_colour(row):
return ['background-color:'+row.colour.lower()for i in row]
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
df1.style.apply(row_colour,axis=1)
df2.style.apply(row_colour,axis=1)
df3.style.apply(row_colour,axis=1)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()
In output no color is visible:
The accepted answer is right for the above question.
I have improved the task by deleting the color column since it's only use was to color the rows.
Code for it:
import pandas as pd
def row_colour(table,color):
print("table: \n "+str(table))
print("table shape : "+str(table.shape))
color_data = []
for index,row in table.iterrows():
color.iloc[index]
if str(color.iloc[index]['colour']) == "DarkRed":
c= 'background-color:red'
else:
c= 'background-color:'+str(color.iloc[index]['colour'])
color_data.append([c for i in range(len(row))])
return pd.DataFrame(color_data,index=table.index, columns=table.columns)
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
color = pd.DataFrame(columns=['colour'])
color['colour']=df1['colour']
df1 = df1.drop(['colour'],axis=1)
df1=df1.style.apply(row_colour,axis=None,color=color)
color = pd.DataFrame(columns=['colour'])
color['colour']=df2['colour']
df2=df2.drop(['colour'],axis=1)
df2=df2.style.apply(row_colour,axis=None,color=color)
color = pd.DataFrame(columns=['colour'])
color['colour']=df3['colour']
df3=df3.drop(['colour'],axis=1)
df3=df3.style.apply(row_colour,axis=None,color=color)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()
The function is ok, you just have to reassign df1, df2, df3. This should work:
import pandas as pd
def row_colour(row):
return ['background-color:'+row.colour.lower()for i in row]
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
df1 = df1.style.apply(row_colour,axis=1)
df2 = df2.style.apply(row_colour,axis=1)
df3 = df3.style.apply(row_colour,axis=1)
df1.to_excel(writer, sheet_name='Sheet 1')
df2.to_excel(writer, sheet_name='Sheet 2')
df3.to_excel(writer, sheet_name='Sheet 3')
writer.save()
to_excel here is a method of pandas.io.formats.style.Styler rather than the original dataframe.
As an answer to your comment, I came up with a more complex solution.
The colours are now read from the DataFrame before being dropped. Then passed as an argument to a row-colouring function.
The key points are my use of zip and pd.IndexSlice for subsetting df.style.apply. I hope this suits your colouring needs.
import pandas as pd
def colour_row(row, colour):
return ['background-color:'+ colour.lower() for i in row]
def colour_df(df, colour_col):
colours = list(df['colour'])
df = df.drop('colour', axis = 1)
coloured_df = df.style
for i, colour in zip(range(len(df)), colours):
coloured_df = coloured_df.apply(colour_row, axis=1, subset=pd.IndexSlice[i,:], colour=colour)
return coloured_df
writer = pd.ExcelWriter('try.xlsx', engine='xlsxwriter')
cols = ['subject','colour']
df1 = pd.DataFrame([['Math','DarkRed'],['Science','Yellow']],columns=cols)
df2 = pd.DataFrame([['English','Orange'],['History','Green']],columns=cols)
df3 = pd.DataFrame([['Geography','DarkRed'],['Civic','Yellow']],columns=cols)
sheet_num = 1
for df in [df1, df2, df3]:
sheet_name = 'Sheet ' + str(sheet_num)
df = colour_df(df, 'colour')
df.to_excel(writer, sheet_name = sheet_name)
sheet_num += 1
writer.save()
I am made a bokeh graph using a 'for loop'. But this method prevents me from adding tooltips since using the # method for the hover tuple prevents me from adding a column name if it is a loop. Is there any way to add the value and name of each country to my tooltip in a 'for loop'? the # hover line below does not work.
import pandas as pd
url = 'https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2018-all-data.xlsx'
df = pd.read_excel(url, sheet_name = 'Gas Consumption - Bcf', skiprows = 2, skipfooter = 15)
df = df.dropna(how='all').transpose()
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df = df.reset_index()
df.rename(columns = {'index': 'Year'}, inplace=True)
df = df.drop(df.index[[53, 54, 55]])
df['Year'] = pd.to_datetime(df['Year'], format = '%Y')
top_ten = df.tail(1).T.reset_index().iloc[1:,:]
top_ten.columns = ['country', 'value']
top_ten = top_ten.sort_values(by = 'value', ascending= False)
top_ten_list = top_ten['country'].tolist()
top_ten_list = [x for x in top_ten_list if not 'Total' in x][0:10]
from bokeh.plotting import figure, output_notebook, show, reset_output
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from bokeh.models import HoverTool
import itertools
from bokeh.models import Legend
mypalette = Category10[10]
output_notebook()
q = figure(plot_width=700, plot_height=500, x_axis_type='datetime')
for c, color in zip(top_ten_list, mypalette):
q.line(df['Year'],df[c], legend=c, color = color, line_width = 3)
#hover = HoverTool(tooltips = [('Date', '#Year{%Y}'), ('Country', '#c billion cubic feet per day')], formatters = {'Year' : 'datetime'})
q.add_tools(hover)
q.legend.location = "top_left"
q.xaxis.axis_label = "Date"
q.yaxis.axis_label = "billion cubic feet per day"
q.legend.click_policy="hide"
show(q)
I replaced the for loop with a ColumnDataSource and multiline which makes it easy to add a hovertool. I also had to add some CustomJS because calling #x/#y from multiline shows all the x/y values. The CustomJS makes sure that it only shows the right x/y position.
import pandas as pd
from bokeh.plotting import figure, show, reset_output, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.models.glyphs import MultiLine
import itertools
from bokeh.models import Legend
from bokeh.models.tools import CustomJSHover
url = 'https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2018-all-data.xlsx'
df = pd.read_excel(url, sheet_name = 'Gas Consumption - Bcf', skiprows = 2, skipfooter = 15)
df = df.dropna(how='all').transpose()
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df = df.reset_index()
df.rename(columns = {'index': 'Year'}, inplace=True)
df = df.drop(df.index[[53, 54, 55]])
top_ten = df.tail(1).T.reset_index().iloc[1:,:]
top_ten.columns = ['country', 'value']
top_ten = top_ten[~top_ten.country.str.contains("Total")]
top_ten = top_ten.sort_values(by = 'value', ascending= False)
top_ten_list = top_ten['country'].tolist()[:10]
top_ten = df[top_ten_list]
y = [df[country].tolist() for country in top_ten.columns.tolist()]
x, xLst = [], df['Year'].tolist()
for i in range(10):
x.append(xLst)
x_custom = CustomJSHover(code="""
return '' + special_vars.data_x
""")
y_custom = CustomJSHover(code="""
return '' + special_vars.data_y
""")
data = {'x': x, 'y': y, 'color': Category10[10], 'name': top_ten_list}
source = ColumnDataSource(data)
output_notebook()
q = figure(plot_width=700, plot_height=500)
q.multi_line(xs='x', ys='y', line_color='color', legend='name', line_width = 3, source=source)
q.add_tools(HoverTool(
tooltips=[
('Year', '#x{custom}'),
('Value', '#y{custom}'),
('Country', '#name')],
formatters=dict(x=x_custom, y=y_custom)
))
q.legend.location = "top_left"
q.xaxis.axis_label = "Date"
q.yaxis.axis_label = "billion cubic feet per day"
q.legend.click_policy="hide"
show(q)