I am trying to create 3 different dataframes to output in my excel file in 3 separate worksheet called df, df_OK, df_KO. However the code below only outputs df and is not creating the other 2 dataframes df_OK, df_KO to have in the same Excel file but in 2 separate worksheets.
Any suggestions? Thanks
class blah:
def __init__(self, path, file_in, file_out):
self.path = path
self.file_in = file_in
self.file_out = file_out
def process_file(self):
df = pd.read_excel(self.path + self.file_in)
df_OK = df.loc[df['Status'] == 'OK']
df_KO = df.loc[df['Status'] == 'KO']
df_OK.loc['Total'] = df_OK[['Price']].sum(axis=0)
writer = pd.ExcelWriter(self.path + self.file_out, engine='xlsxwriter')
dfs = {
'All': df,
'OK': df_OK,
'KO': df_KO
}
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
b = blah('C:/Users/......./',
'path...',
'file_in....',
'file_out...')
b.process_file()
It is because you overwrite the same Excel file in every iteration of your for sheet_name in dfs.keys() loop. So every time you write an Excel file with only a single sheet to the same filename, thus overwriting the previous document.
You should move the writer.save() outside your loop like so:
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
Related
I am attempting to write some code where for every time I run a python script a data frame (that has been made) automatically becomes a excel table in a defined folder path. However I want it to work in such a way that by re running the code the data frame would append to the end of the existing excel table, creating a new excel table. Currently I am using this code to do the data overlap:
def append_df_to_excel(filename, df, sheet_name='Sheet2', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Returns: None
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists = 'overlay')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs,header = False, index = False)
ws = writer.book[sheet_name]
if resizeColumns:
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
writer.save()
This code successfully allows me to run the code as many times as i want and append the data onto the end of the previously ran python script. However those outputted excel sheets are not in table format.
Currently my attempt to make a table is as follows:
ws = writer.book[sheet_name]
def make_table(worksheet, df):
column_settings = []
for header in df.columns:
column_settings.append( header)
table = Table(displayName="Contacts", ref="A1:" + get_column_letter(worksheet.max_column) + str(worksheet.max_row))
table._initialise_columns()
for column, value in zip(table.tableColumns, column_settings):
column.name = value
worksheet = worksheet.add_table(table)
However the column names do not update accordingly in the excel sheet, excel cites an error for this along the lines of 'had to recover/delete unworkable parts'
But also upon trying to run the script a second time the following python error:
'Table with name Contacts already exists'
Any help would be greatly appreciated!
Here is a toy data frame for testing:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])
Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?
How can I append a row at the top of an excel sheet? Goal as follows:
The file itself is written by using pandas.df.to_excel as follows:
import pandas
with pandas.ExcelWriter(output_filename) as writer:
for file in files:
df = pandas.read_csv(file)
df.to_excel(writer, sheet_name=file.replace(".csv", "").replace("_", " ").title(), index=False)
Here is one way to do it using XlsxWriter as the Excel engine:
with pandas.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
for file in files:
df = pandas.read_csv(file)
sheet_name = file.replace(".csv", "").replace("_", " ").title()
df.to_excel(writer, sheet_name=sheet_name, index=False, startrow=1)
worksheet = writer.sheets[sheet_name]
worksheet.write('A1', 'Here is some additional text')
You can use openpyxl to edit your Excel file afterwards:
import contextlib
import openpyxl
import pandas as pd
new_row = "THIS ROW IS APPENDED AFTER THE FILE IS WRITTEN BY PANDAS"
with contextlib.closing(openpyxl.open(output_filename)) as wb:
for file in files:
sheet_name = file.replace(".csv", "").replace("_", " ").title()
sheet = wb[sheet_name]
sheet.insert_rows(0)
sheet["A1"] = new_row
wb.save(output_filename)
I am using Ubuntu 16.0.4. After reading from an excel file, I am trying to add multiple excel sheet to a pdf file.
df = pd.read_excel(excel_name, sheet_name = 'Sheet1')
df = df.dropna(axis = 1, how='all')
df = df.dropna(how='all')
df.to_html("file.html")
pdf_name = name_of_file + '.pdf'
pdfkit.from_file("file.html", pdf_name)
How can I add another excel sheet from the same excel file to the same pdf file without overwriting the previous sheet that is in the pdf?
Thanks!
If the two sheets have the same data structure (columns and etc.):
df1 = pd.read_excel(excel_name, sheet_name = 'Sheet1')
df2 = pd.read_excel(excel_name, sheet_name = 'Sheet2')
df = df1.append(df2)
If not:
df1 = pd.read_excel(excel_name, sheet_name = 'Sheet1')
df2 = pd.read_excel(excel_name, sheet_name = 'Sheet2')
# Do whatever you need to transform the dfs
html_str = '<br />'.join([df1.to_html(), df2.to_html()])
with open("file.html", "w") as text_file:
text_file.write(html_str)
pdf_name = name_of_file + '.pdf'
pdfkit.from_file("file.html", pdf_name)
Below creates a file and then fills in the excel. I would like to create an excel file with a condition based on a single column and redeposit the excel with a prefix of the column name.
So return only where columnX = i and create and save excel file i1_CCBHC_MONTHLY_CLAIMS.XLSX
i2_CCBHC_MONTHLY_CLAIMS.XLSX
I have the build of the large "parent" excel file.
filename = 'CCBHC_Monthly_Claims.xlsx'
if os.path.isfile(filename):
wb = xw.Book(filename)
ws = wb.sheets['CCBHC_DATA']
ws.range('A1').options(index=False).value = df_ora
wb = xw.Book(filename)
xw.apps[0].quit()
else:
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
*df_ora.to_excel(writer, sheet_name='CCBHC_DATA',index=False)
wb = xw.Book(filename)
ws = wb.sheets['CCBHC_DATA']
ws.range('A1').options(in*dex=False).value = df_ora
wb = xw.Book(filename)
xw.apps[0].quit()