I am trying to style and write excel files dynamically. Here is my code
import pandas as pd
import copy
class OutputWriter(object):
def __init__(self, fmt_func, sheet_name='data'):
'''
Initializing...
'''
# NOTICE: Initialising with path set None since I do not know path yet
wrt = pd.ExcelWriter(None, engine='xlsxwriter')
self._writer = fmt_func(wrt, sheet_name)
self._sheet_name = sheet_name
def save(self, df, o_path):
'''
Save the file to a path
'''
# setting path in writer before saving
self._writer.path = o_path
df.to_excel(self._writer, sheet_name=self._sheet_name)
self._writer.save()
# Change first row color to blue
def fmt_func_blue(wrt, sheet_name):
# ERROR Cannot clone `wrt` path is not set
writer = copy.deepcopy(wrt)
sheet = writer.sheets[sheet_name]
workbook = writer.book
# Proceed to color first row blue
header_fmt = workbook.add_format({
'text_wrap': True,
'bg_color': '#191970',
'font_color': '#FFFFFF',
})
header_fmt.set_align('center')
header_fmt.set_align('vcenter')
sheet.set_row(0, None, header_fmt)
return writer
# Change first row color to red
def fmt_func_red(wrt, sheet_name):
writer = copy.deepcopy(wrt)
# I haven't saved the excel file so there are no sheets
sheet = writer.sheets[sheet_name]
workbook = writer.book
# Proceed to color first row red
header_fmt = workbook.add_format({
'text_wrap': True,
'bg_color': '#FF2200',
'font_color': '#FFFFFF',
})
header_fmt.set_align('center')
header_fmt.set_align('vcenter')
sheet.set_row(0, None, header_fmt)
return writer
writer_red = OutputWriter(fmt_func_red, sheet_name='red')
writer_blue = OutputWriter(fmt_func_blue, sheet_name='blue')
I have two issues:
1) I can't clone the xlwriter object in my styling function
2) There are no sheets in my workbook at the time I try to style the excel files.
Is there any way I can make this work?
1) I can't clone the xlwriter object in my styling function
One wouldn't be able to clone a workbook that doesn't exist yet(or just created with nothing worth in it, here worthiness is checked via if path exts). Let's act on the error that is being thrown, change:-
def fmt_func_blue(wrt, sheet_name):
# ERROR Cannot clone `wrt` path is not set
writer = copy.deepcopy(wrt)
to
def fmt_func_blue(wrt, sheet_name):
# ERROR Cannot clone `wrt` path is not set
writer=wrt
if writer.book.filename:
writer = copy.deepcopy(wrt)
else:
# Any changes to the new workbook will be reflected on the new
# workbook of the writer(created in init) not on a copy.
# If you want your class init writer untouched,
# and want to format a completely new instance of workbook, recreate
# a new writer and assign it to the local writer here.
print('Warning: Working with new workbook')#or use log
This should take care of it, remember it is ok to not clone a perfectly empty workbook. But if for some design you need to do so, then create a clone yourself, that is create a perfectly empty workbook yourself like you did in init.
2) There are no sheets in my workbook at the time I try to style the
excel files.
Well one cannot format sheets that do not exist yet. So just create one and populate with dataframe(or any other) data later into the already formatted sheet. I suggest a change as below:-
# I haven't saved the excel file so there are no sheets
sheet = writer.sheets[sheet_name]
to
sheet=None
if sheet_name in writer.sheets:
sheet = writer.sheets[sheet_name]
else:
print('Warning: Creating new sheet for formatting <'+sheet_name+'>') # or use log
sheet= workbook.add_worksheet(sheet_name)
Working code is here. It will take sometime to install the libs when you to try to run it.I made a few changes for fail safety and altered methods a bit in my example code.
Output from my example code is as below:-
Warning: Working with new workbook
Warning: Creating new sheet for formatting <red>
Warning: Working with new workbook
Warning: Creating new sheet for formatting <blue>
Related
I have 8 excel files in directory. Out of those 8 excel files, I want to create an excel workbook using few worksheet (only few) and then create another workbook using rest worksheet. I have written the code as follows -
from pathlib import Path # Standard Python Module
import time # Standard Python Module
import xlwings as xw # pip install xlwings
# Adjust Paths
BASE_DIR = Path(__file__).parent
SOURCE_DIR = BASE_DIR / 'Region'
OUTPUT_DIR = BASE_DIR / 'Output'
# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Using 5 excel files from the 8 excel file list as mentioned above
include_ws = ['US-CAN ABS','US-CAN CMBS RMBS','US-CAN CDO','US-CAN Cov Bonds','Reason Codes Description']
excel_files = Path(SOURCE_DIR).glob('*.xlsx')
# Create timestamp
t = time.localtime()
timestamp = time.strftime('%Y-%m-%d_%H%M', t)
with xw.App(visible=False) as app:
combined_wb = app.books.add()
for excel_file in excel_files:
wb = app.books.open(excel_file)
for sheet in wb.sheets:
if sheet.name in include_ws:
sheet.copy(after=combined_wb.sheets[0])
wb.close()
combined_wb.sheets[0].delete()
combined_wb.save(OUTPUT_DIR / f'all_worksheets_{timestamp}.xlsx')
combined_wb.close()
Getting following error
com_error: (-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', 'A workbook must contain at least one visible worksheet. \n\nTo hide, delete, or move the selected sheet(s), you must first insert a new sheet or unhide a sheet that is already hidden.', 'xlmain11.chm', 0, -2146827284), None)
Any suggestion ?
Basically the error means you are trying to delete the one and only sheet in a workbook and that is not possible. It doesn't look like you are making any checks before trying to do the deletion.
I.e
You create the workbook 'combined_wb' and then if a sheet from 'wb' is in your list 'include_ws' copy across. However if none are, then no sheets are copied across to 'combined_wb' so it remains with just 'Sheet1'. Then you try to delete 'Sheet1', not possible because as the error says 'A workbook must contain at least one visible worksheet.'
You should confirm there were sheets copied before invoking the delete line.
Currently what I want to do is take data I have from a data frame list and add them to an existing excel file as their own tabs.
To test this out, I have tried it with one data frame. There are no error but when I go to open the excel file it says it is corrupt. I proceed to recover the information but I rather not have to do that every time. I believe it would fail if I looped through my list to make this happen.
import os,glob
import pandas as pd
from openpyxl import load_workbook
master_file='combined_csv.xlsx'
#set the directory
os.chdir(r'C:\Users\test')
#set the type of file
extension = 'csv'
#take all files with the csv extension into an array
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
col_to_keep=["Name",
"Area (ft)",
"Length (ft)",
"Center (ft)",
"ID",
"SyncID"]
combine_csv = pd.concat([pd.read_csv(f, delimiter=';', usecols=col_to_keep) for f in all_filenames])
combine_csv.to_excel(master_file, index=False,sheet_name='All')
# Defining the path which excel needs to be created
# There must be a pre-existing excel sheet which can be updated
FilePath = r'C:\Users\test'
# Generating workbook
ExcelWorkbook = load_workbook(FilePath)
# Generating the writer engine
writer = pd.ExcelWriter(FilePath, engine = 'openpyxl')
# Assigning the workbook to the writer engine
writer.book = ExcelWorkbook
# Creating first dataframe
drip_file = pd.read_csv(all_filenames[0], delimiter = ';', usecols=col_to_keep)
SimpleDataFrame1=pd.DataFrame(data=drip_file)
print(SimpleDataFrame1)
# Adding the DataFrames to the excel as a new sheet
SimpleDataFrame1.to_excel(writer, sheet_name = 'Drip')
writer.save()
writer.close()
It seems like it runs fine with no errors but when I open the excel file I get the error shown below.
Does anyone see something wrong with the code that would cause excel to give me this error?
Thank you in advance
Your code knows its printing data to the same workbook, but to use writer you will also need to tell python what the sheet names are:
book = load_workbook(your_destination_file)
writer = pd.ExcelWriter(your_destination_file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets) # tells
pandas/python what the sheet names are
Your_dataframe.to_excel(writer, sheet_name=DesiredSheetname)
writer.save()
Also, if you have pivots, pictures, external connections in the document they will be deleted and could be what is causing the corruption.
I have an existing excel file which I have to update every week with new data, appending it to the last line of an existing sheet. I was accomplishing this in this manner, following the solution provided in this post How to write to an existing excel file without overwriting data (using pandas)?
import pandas as pd
import openpyxl
from openpyxl import load_workbook
book = load_workbook(excel_path)
writer = pd.ExcelWriter(excel_path, engine = 'openpyxl', mode = 'a')
writer.book = book
## ExcelWriter for some reason uses writer.sheets to access the sheet.
## If you leave it empty it will not know that sheet Main is already there
## and will create a new sheet.
ws = book.worksheets[1]
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, 'Preço_por_quilo', startrow = len(ws["C"]), header = False, index = False)
writer.save()
writer.close()
This code was running ok until today, when it returned the following error:
ValueError: Sheet 'Preço_por_quilo' already exists and if_sheet_exists is set to 'error'.
which apparently results from the latest update of the openpyxl package, which added the "if_sheet_exists" argument to the ExcelWriter function.
How can I correct this code, in order to append my data to the last line of the sheet?
adding if_sheet_exists=replace to the end of your df.to_excel should work, like below:
df.to_excel(writer, 'Preço_por_quilo', startrow = len(ws["C"]), header = False, index = False, if_sheet_exists='replace')
More information on it's use can be found here:
https://pandas.pydata.org/docs/reference/api/pandas.ExcelWriter.html
After I run the code, I cannot seem to open the generated excel file, because my computer says it is "open in another program". If I close python, then I can open it.
When I try to add "workbook.close()" to the code, I get the error "Calling close() on already closed file." So it seems python already closed it.
What can I do to be able to see my outputs without having to close python?
import pandas as pd
# Import data
df = pd.read_excel (r'C:\Users\Desktop\work\charts\constraints.xlsx' , sheet_name='biggestobstacle')
print (df)
# File 2 is the new excel table with the chart. Hence, we keep the originals untouched. We create a new excel + chart.
writer = pd.ExcelWriter("file2.xlsx", engine="xlsxwriter")
df.to_excel(writer, sheet_name='biggestobstacle')
# Get xlsxwriter objects
workbook = writer.book
worksheet = writer.sheets['biggestobstacle']
# Create a 'column' chart
chart = workbook.add_chart({'type': 'column'})
# select the values of the series and set a name for the series
chart.add_series({
'values': '=\'biggestobstacle\'!$B$2:$B$8',
"name": "My Series's Name"
})
# Insert the chart into the worksheet in the D2 cell
worksheet.insert_chart('D2', chart)
writer.save()
# This last line generates an error.
workbook.close()
From:
https://github.com/pandas-dev/pandas/blob/v1.2.3/pandas/io/excel/_base.py#L584-L900
It looks like you just .close() and don't worry about the save()
From the excelWriter class
def close(self):
"""synonym for save, to make it more file-like"""
content = self.save()
self.handles.close()
return content
try using a context manager it handles closing files automatically
it goes like this.
with open("StockQuotes.csv") as csvfile:
dialect = csv.Sniffer().sniff(csvfile.read(1024))
csvfile.seek(0)
hasHeader = csv.Sniffer().has_header(csvfile.read(1024))
csvfile.seek(0)
print("Headers found: " + str(hasHeader))
print(dialect.delimiter)
print(dialect.escapechar)
print(dialect.quotechar)
I tried to search many places but dit not see any example snippet of code about how to delete an existing worksheet in excel file by using xlutils or xlwt with python. Who can help me, please?
I just dealt with this and although this is not generally a good coding choice, you can use the internal Workbook_worksheets to access and set the worksheets for a workbook object.
write_book._Workbook__worksheets = [write_book._Workbook__worksheets[0]]
this would strip everything but the first worksheet associated with a Workbook
I just wanted to confirm that I got this to work using the answer David gave. Here is an example of where I had a spreadsheet (workbook) with 40+ sheets that needed to be split into their own workbooks. I copied the master workbook removed all but the one sheet and saved to a new spreadsheet:
from xlrd import open_workbook
from xlutils import copy
workbook = open_workbook(filepath)
# Process each sheet
for sheet in workbook.sheets():
# Make a copy of the master worksheet
new_workbook = copy.copy(workbook)
# for each time we copy the master workbook, remove all sheets except
# for the curren sheet (as defined by sheet.name)
new_workbook._Workbook__worksheets = [ worksheet for worksheet in new_workbook._Workbook__worksheets if worksheet.name == sheet.name ]
# Save the new_workbook based on sheet.name
new_workbook.save('{}_workbook.xls'.format(sheet.name))
The following method does what you need:
def deleteAllSheetBut(workingFolder, xlsxFILE, sheetNumberNotToDelete=1):
import win32com.client as win32
import os
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
excel.DisplayAlerts = False
wb = excel.Workbooks.Open( os.path.join( workingFolder, xlsxFILE ) )
for i in range(1, wb.Worksheets.Count):
if i != sheetNumberNotToDelete:
wb.Worksheets(i).Delete()
wb.Save()
excel.DisplayAlerts = True
excel.Application.Quit()
return
not sure about those modules but u can try win32
from win32com import client
def delete(self, number = 1):
"""
(if the sheet is the first use 1. -1 to use real number)
example: r.delete(1)
"""
sheetnumber = int(number) - 1