Pandas `to_excel` set font name - python

Is it possible to set the font name with pandas' excel writer?
Here is my attempt:
import pandas as pd
from pandas.io.excel import ExcelWriter
df = pd.DataFrame([[1,2,3],[4,5,6]])
writer = ExcelWriter("test.xlsx")
writer.set_font_name("Arial") # this would be ideal, but no such method exists
format = writer.book.add_format({"font_name": "Arial"})
df.to_excel(writer) # format is not applied. Need to pass `format` object somewhere
writer.close()
Where can I pass the xlsxwriter.format.Format object once it has been created?

UPDATE: using a solution similar to #miradulo's solution we can make it working for different versions of Pandas:
try:
import pandas.io.formats.excel as fmt_xl # version >= 0.20
except ImportError:
try:
import pandas.formats.format as fmt_xl # 0.18 <= version < 0.20
except ImportError:
import pandas.core.format as fmt_xl # version < 0.18
df = pd.DataFrame([[1,2,3],[4,5,6]], columns=list('abc'))
fmt_xl.header_style = None
writer = pd.ExcelWriter("d:/temp/test.xlsx")
df.to_excel(writer, index= False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
fmt = writer.book.add_format({"font_name": "Arial"})
worksheet.set_column('A:Z', None, fmt)
worksheet.set_row(0, None, fmt)
writer.save()
XlsxWriter Docs: Working with Python Pandas and XlsxWriter

Related

I am trying to retrieve the content from an Excel using Python. I am getting Investment undefined error

import xlrd
import os
class datafromexcel:
def __init__(self) -> None:
rootPath = os.getcwd()
loc = (rootPath+"Investment.xlsx");
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
list = [];
for i in range(sheet.nrows):
#print(sheet.cell_value(i, 0),sheet.cell_value(i, 1))
list.append(Investment(sheet.cell_value(i, 0),sheet.cell_value(i, 1)));
print("Successfully retrieved all excel data");
I recommend you to use Pandas library and read your excel file with it
import pandas as pd
pd.read_excel('EXCEL_FILE.xlsx', index_col=0)
for more details: https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html

how to format excel file using python?

I have a script that scrapes data from list of websites using beautifulSoup package and save in an excel file using pandas and xlsxwriter packages.
What i want is to be able to format the excel file as i need like the width of the columns
but when i run the script it crash and display the below error.
AttributeError: 'NoneType' object has no attribute 'write'
code:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import xlsxwriter
def scrap_website():
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
joineddd = []
for url in url_list:
soup = BeautifulSoup(requests.get(url).content,"lxml")
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
### update Start ###
alldd = dict()
alldd['link'] = link
dd_div = [i for i in s.select("div[class='card-content is-spaced'] div")
if ('<dd>' in str(i) ) and ( "<dt>" in str(i))]
for div in dd_div:
k = div.select_one('dt').get_text(';', True)
v = div.select_one('dd').get_text(';', True)
alldd[k] = v
### update End ###
joineddd.append(alldd)
# result
df = pd.DataFrame(joineddd)
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
workbook = xlsxwriter.Workbook(df_to_excel)
worksheet = workbook.add_worksheet()
worksheet.set_column(0, 0,50)
workbook.close()
where is the error and how to fix it ?
To access and format the Excel workbook or worksheet created by to_excel() you need to create an ExcelWriter object first. Something like this:
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({'Data': [10, 20, 30, 20, 15, 30, 45]})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False, header=True)
# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Set the column width.
worksheet.set_column(0, 0, 50)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
See Working with Python Pandas and XlsxWriter for more details.
to_excel function returns nothing. It's why you got the error message.
# save excel file
excel_file_name = r"jobDesc.xlsx"
df.to_excel(excel_file_name, index = False, header=True)
# open excel file for change col width or something
workbook = xlsxwriter.Workbook(excel_file_name)
Basically, you can't change existing file with xlsxwriter. There is a way to do so, but it is not recommended. I recommend openpyxl package instead of this. FYI, xlsxwriter: is there a way to open an existing worksheet in my workbook?

Error 'Workbook' object has no attribute 'add_vba_project'

I've successfully Extracted vbaProject.bin but I'm still getting an error. My goal is to be able to use VBA after the information is passed to the xlsm file. This is a workaround provided by xlswriter.
import pandas as pd
def export(self):
sql = "SELECT WellID,AFEno,AFEsuppno,AccountCode,AFEAmount FROM WellAFEDetails WHERE WellID = ?"
df = pd.read_sql_query(sql,self.con, params=([self.wellid_bx.get()]))
writer = pd.ExcelWriter (r'C:\Users\bjenks\Desktop\Macro.xlsm')
df.to_excel(writer, sheet_name='Raw')
workbook = writer.book
workbook.filename = (r'C:\Users\bjenks\Desktop\Macro.xlsm')
workbook.add_vba_project(r'C:\Users\bjenks\Desktop/vbaProject.bin')
writer.save()

ExcelWriter overwriting sheets when writing

This program should take the contents of individual sheets and put them into one excel workbook. It almost does that, but it is overwriting instead of appending new sheets into the final workbook. I read that pandas excel writer is the way to go with this, any ideas as to why its having this behavior?
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, sheet_name = file_name)
writer.save()
#writer.close()
Make sure the writer.save() is outside of the loop. Also be aware of the character limit on sheetnames, so if the file names are the same up to a certain point, you run the risk of writing over a sheetname that way as well.
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
df.to_excel(writer, sheet_name = file_name)
writer.save() #make sure this is outside of the loop.
ETA: establish the writer outside of the loop as well

Issues with saving an Excel.Writer file to new path

I am trying to create a timed backup system for my excel document with Python as multiple users will be accessing it.
I want to change the path of the file not to my local directory.
Here's the code;
import pandas as pd
import datetime
import numpy
now = datetime.datetime.now()
ct = now.strftime("%Y-%m-%d %H.%M")
table = pd.read_excel(r'Z:\new\Planner_New.xlsx',
sheet_name = 'Jan18',
header = 0,
index_col = 0,
usecols = "A:AY",
convert_float = True)
writer = pd.ExcelWriter('Planner' + ct + '.xlsx', engine='xlsxwriter')
table.to_excel(writer, sheet_name = "Jan18")
workbook = writer.book
worksheet = writer.sheets['Jan18']
format1 = workbook.add_format({'num_format': '0%'})
worksheet.set_column('H:AY', None, format1)
writer.save()
writer.close()
I have tried
outpath = (r'Z:\backup')
writer.save(outpath)
writer.close()
But get back
TypeError: save() takes 1 positional argument but 2 were given
You need to specify the save location when you create the ExcelWriter object:
writer = pd.ExcelWriter(r'Z:\backup\Planner' + ct + '.xlsx', engine='xlsxwriter')
...
writer.save()
writer.close()

Categories

Resources