The following is a modified append script I obtained from stack overflow,
import pandas as pd
import openpyxl
import glob
import os
import xlsxwriter
from openpyxl import load_workbook
from tkinter import Tk, filedialog
from pathlib import Path
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
# >>> append_df_to_excel('d:/temp/test.xlsx', df)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
# index=False)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
# index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
root = Tk()
root.withdraw()
root.attributes('-topmost', True)
source_dir = filedialog.askdirectory()
file_names = glob.glob(os.path.join(source_dir, '*.xlsx'))
service_name = os.path.basename(os.path.dirname(source_dir))
year = os.path.basename(source_dir)
target_file = service_name + " " + year + ".xlsx"
workbook = xlsxwriter.Workbook(target_file)
worksheet = workbook.add_worksheet()
workbook.close()
srcfile = openpyxl.load_workbook(target_file, read_only=False,
keep_vba=False)
sheetname = srcfile['Sheet1']
r = 2
srcfile.save(target_file)
for file in file_names:
df=pd.read_excel(file)
append_df_to_excel(target_file, df, header=False, index=False, startrow=r, startcol=0)
index = df.index
Num_of_Rigs = len(index)
r += Num_of_Rigs
print (r)
I added some additional code to use the append function.
However, this code works in Python 3.9, but I get the following error in Python 3.10 :
/usr/local/bin/python3 "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py"
Traceback (most recent call last):
File "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py", line 136, in <module>
append_df_to_excel(target_file, df, header=False, index=False, startrow=r, startcol=0)
File "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py", line 87, in append_df_to_excel
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/generic.py", line 2284, in to_excel
formatter.write(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/io/formats/excel.py", line 840, in write
writer.write_cells(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py", line 436, in write_cells
raise ValueError(
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'.
Process finished with exit code 1
I can't seem to understand what changed to make it not work in Python 3.10.
This is the only difference in both environments.
Related
I am attempting to write some code where for every time I run a python script a data frame (that has been made) automatically becomes a excel table in a defined folder path. However I want it to work in such a way that by re running the code the data frame would append to the end of the existing excel table, creating a new excel table. Currently I am using this code to do the data overlap:
def append_df_to_excel(filename, df, sheet_name='Sheet2', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Returns: None
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists = 'overlay')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs,header = False, index = False)
ws = writer.book[sheet_name]
if resizeColumns:
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
writer.save()
This code successfully allows me to run the code as many times as i want and append the data onto the end of the previously ran python script. However those outputted excel sheets are not in table format.
Currently my attempt to make a table is as follows:
ws = writer.book[sheet_name]
def make_table(worksheet, df):
column_settings = []
for header in df.columns:
column_settings.append( header)
table = Table(displayName="Contacts", ref="A1:" + get_column_letter(worksheet.max_column) + str(worksheet.max_row))
table._initialise_columns()
for column, value in zip(table.tableColumns, column_settings):
column.name = value
worksheet = worksheet.add_table(table)
However the column names do not update accordingly in the excel sheet, excel cites an error for this along the lines of 'had to recover/delete unworkable parts'
But also upon trying to run the script a second time the following python error:
'Table with name Contacts already exists'
Any help would be greatly appreciated!
Here is a toy data frame for testing:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])
Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?
How can I append a row at the top of an excel sheet? Goal as follows:
The file itself is written by using pandas.df.to_excel as follows:
import pandas
with pandas.ExcelWriter(output_filename) as writer:
for file in files:
df = pandas.read_csv(file)
df.to_excel(writer, sheet_name=file.replace(".csv", "").replace("_", " ").title(), index=False)
Here is one way to do it using XlsxWriter as the Excel engine:
with pandas.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
for file in files:
df = pandas.read_csv(file)
sheet_name = file.replace(".csv", "").replace("_", " ").title()
df.to_excel(writer, sheet_name=sheet_name, index=False, startrow=1)
worksheet = writer.sheets[sheet_name]
worksheet.write('A1', 'Here is some additional text')
You can use openpyxl to edit your Excel file afterwards:
import contextlib
import openpyxl
import pandas as pd
new_row = "THIS ROW IS APPENDED AFTER THE FILE IS WRITTEN BY PANDAS"
with contextlib.closing(openpyxl.open(output_filename)) as wb:
for file in files:
sheet_name = file.replace(".csv", "").replace("_", " ").title()
sheet = wb[sheet_name]
sheet.insert_rows(0)
sheet["A1"] = new_row
wb.save(output_filename)
my propose:
if excel file not exist, create it and copy data table to it;
if excel file exist, copy to data table to new sheet.
but following code running, only copy to data to new sheet, original sheet in excel file was removed.
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl")
writer.wb = wb
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="sheet2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="sheet1",index=False) # write table_1 into excel file
writer.save()
writer.close()
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl") #assign engine
writer.book = wb #overwrite if no this
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="table_2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="table_1",index=False) # write table_1 into excel file
writer.save()
writer.close()
I'm having 10 CSV files each of the CSV file is having same number of column from which I'm reading data one by one in the form of pandas data frame. I want those data to be displayed on console/Terminal in some table form. And it should be like if every time the data gets into new row. Any suggestions on this ?
Below is my sample CSV file :
Like this, there are 10 or more CSV file and I will be reading data from those file one by one and want to display on Console/Terminal.
Brief Introduction to my Application
I have a machine that is generating CSV files after a certain interval of time into a folder. I am using Watchdog library to put a watch on the folder where the CSV files are being generated. When I receive a CSV file I Read it into a pandas data frame. Sample CSV file is given above.
As far as the machine is running it will keep generating the CSV files. So if I want to see the data I need to open each and every CSV files, Instead, I want a View in which the Data gets updated when there is a new CSV file generated.
So Technically One CSV file is getting read gets converted into a data frame and then printed on Console/Terminal. And this process happens again when a new CSV file is generated, But when new data frame arrives, It should not overwrite the whole console, Instead it appends to the existing data on console.
Here is my main file :
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import pandas as pd
from Append_Function import append_df_to_excel
import os.path
import sys
class Watcher:
def __init__(self, args):
self.watch_dir = os.getcwd()
print(args[0])
self.directory_to_watch = os.path.join(self.watch_dir, args[1])
self.observer = Observer()
self.event_handler = Handler(patterns=["*.CSV"], ignore_patterns=["*.tmp"], ignore_directories=True)
def run(self):
self.observer.schedule(self.event_handler, self.directory_to_watch, recursive=False)
self.observer.start()
try:
while True:
time.sleep(1)
except:
self.observer.stop()
print("Error")
self.observer.join()
class Handler(PatternMatchingEventHandler):
#staticmethod
def on_any_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
print("Received created event - %s." % event.src_path)
df = pd.read_csv(event.src_path, header=1, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
elif event.event_type == 'modified':
# Taken any actionc here when a file is modified.
df = pd.read_csv(event.src_path, header=0, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
print("Received modified event - %s." % event.src_path)
if __name__ == '__main__':
print(sys.argv)
w = Watcher(sys.argv)
w.run()
Here is my Append Function:
import pandas as pd
import openpyxl as ox
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = ox.load_workbook(filename,keep_vba=True)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs, header=True)
# save the workbook
writer.save()