Issues with saving an Excel.Writer file to new path - python

I am trying to create a timed backup system for my excel document with Python as multiple users will be accessing it.
I want to change the path of the file not to my local directory.
Here's the code;
import pandas as pd
import datetime
import numpy
now = datetime.datetime.now()
ct = now.strftime("%Y-%m-%d %H.%M")
table = pd.read_excel(r'Z:\new\Planner_New.xlsx',
sheet_name = 'Jan18',
header = 0,
index_col = 0,
usecols = "A:AY",
convert_float = True)
writer = pd.ExcelWriter('Planner' + ct + '.xlsx', engine='xlsxwriter')
table.to_excel(writer, sheet_name = "Jan18")
workbook = writer.book
worksheet = writer.sheets['Jan18']
format1 = workbook.add_format({'num_format': '0%'})
worksheet.set_column('H:AY', None, format1)
writer.save()
writer.close()
I have tried
outpath = (r'Z:\backup')
writer.save(outpath)
writer.close()
But get back
TypeError: save() takes 1 positional argument but 2 were given

You need to specify the save location when you create the ExcelWriter object:
writer = pd.ExcelWriter(r'Z:\backup\Planner' + ct + '.xlsx', engine='xlsxwriter')
...
writer.save()
writer.close()

Related

Dataframe appender not working in Python 3.10

The following is a modified append script I obtained from stack overflow,
import pandas as pd
import openpyxl
import glob
import os
import xlsxwriter
from openpyxl import load_workbook
from tkinter import Tk, filedialog
from pathlib import Path
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
# >>> append_df_to_excel('d:/temp/test.xlsx', df)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
# index=False)
#
# >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
# index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
root = Tk()
root.withdraw()
root.attributes('-topmost', True)
source_dir = filedialog.askdirectory()
file_names = glob.glob(os.path.join(source_dir, '*.xlsx'))
service_name = os.path.basename(os.path.dirname(source_dir))
year = os.path.basename(source_dir)
target_file = service_name + " " + year + ".xlsx"
workbook = xlsxwriter.Workbook(target_file)
worksheet = workbook.add_worksheet()
workbook.close()
srcfile = openpyxl.load_workbook(target_file, read_only=False,
keep_vba=False)
sheetname = srcfile['Sheet1']
r = 2
srcfile.save(target_file)
for file in file_names:
df=pd.read_excel(file)
append_df_to_excel(target_file, df, header=False, index=False, startrow=r, startcol=0)
index = df.index
Num_of_Rigs = len(index)
r += Num_of_Rigs
print (r)
I added some additional code to use the append function.
However, this code works in Python 3.9, but I get the following error in Python 3.10 :
/usr/local/bin/python3 "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py"
Traceback (most recent call last):
File "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py", line 136, in <module>
append_df_to_excel(target_file, df, header=False, index=False, startrow=r, startcol=0)
File "/Users/ahmedhamadto/PycharmProjects/DataFrame Concatenator/main.py", line 87, in append_df_to_excel
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/generic.py", line 2284, in to_excel
formatter.write(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/io/formats/excel.py", line 840, in write
writer.write_cells(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py", line 436, in write_cells
raise ValueError(
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'.
Process finished with exit code 1
I can't seem to understand what changed to make it not work in Python 3.10.
This is the only difference in both environments.

pandas ExcelWriter 'if_sheet_exists' is not working

Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?

AttributeError: 'function' object has no attribute 'to_excel'

I am trying to use the first function into the other 2 functions. I'm getting an error. Here is the code I'm using.I'm quite new to Python, any help will be appreciated.
import pandas as pd
import numpy as np
inputFile = "File.xlsx"
def add():
cols = ["A", "B","C"]
df = pd.read_excel(inputFile, usecols=cols)
df.insert(4, "D", "0")
df.dropna(subset = ["B"], inplace=True)
df = df.replace(np.nan, '--', regex=True)
df.columns = df.columns.str.replace('_',' ')
return add
def out1(df):
df.to_excel("output1.xlsx", index=False)
writer = pd.ExcelWriter("output1.xlsx", engine="xlsxwriter")
workbook = writer.book
df.to_excel(writer, sheet_name="Sheet1", index=False)
ws = writer.sheets["Sheet1"]
ws.autofilter(0, 0, df.shape[0], df.shape[1])
writer.save()
def out2(df):
df.to_excel("output2.xlsx", index=False)
writer = pd.ExcelWriter("output2.xlsx", engine="xlsxwriter")
workbook = writer.book
df.to_excel(writer, sheet_name="Sheet1", index=False)
ws = writer.sheets["Sheet1"]
ws.autofilter(0, 0, df.shape[0], df.shape[1])
writer.save()
out1(add)
out2(add)
Your add() function should return df instead of return add - you're returning the function name, not the variable you're creating, and you also need to call the add() function at the end - out1(add()) instead of out1(add)

Append row to top of excel sheet python

How can I append a row at the top of an excel sheet? Goal as follows:
The file itself is written by using pandas.df.to_excel as follows:
import pandas
with pandas.ExcelWriter(output_filename) as writer:
for file in files:
df = pandas.read_csv(file)
df.to_excel(writer, sheet_name=file.replace(".csv", "").replace("_", " ").title(), index=False)
Here is one way to do it using XlsxWriter as the Excel engine:
with pandas.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
for file in files:
df = pandas.read_csv(file)
sheet_name = file.replace(".csv", "").replace("_", " ").title()
df.to_excel(writer, sheet_name=sheet_name, index=False, startrow=1)
worksheet = writer.sheets[sheet_name]
worksheet.write('A1', 'Here is some additional text')
You can use openpyxl to edit your Excel file afterwards:
import contextlib
import openpyxl
import pandas as pd
new_row = "THIS ROW IS APPENDED AFTER THE FILE IS WRITTEN BY PANDAS"
with contextlib.closing(openpyxl.open(output_filename)) as wb:
for file in files:
sheet_name = file.replace(".csv", "").replace("_", " ").title()
sheet = wb[sheet_name]
sheet.insert_rows(0)
sheet["A1"] = new_row
wb.save(output_filename)

Python create dataframes from a function using ExcelWriter

I am trying to create 3 different dataframes to output in my excel file in 3 separate worksheet called df, df_OK, df_KO. However the code below only outputs df and is not creating the other 2 dataframes df_OK, df_KO to have in the same Excel file but in 2 separate worksheets.
Any suggestions? Thanks
class blah:
def __init__(self, path, file_in, file_out):
self.path = path
self.file_in = file_in
self.file_out = file_out
def process_file(self):
df = pd.read_excel(self.path + self.file_in)
df_OK = df.loc[df['Status'] == 'OK']
df_KO = df.loc[df['Status'] == 'KO']
df_OK.loc['Total'] = df_OK[['Price']].sum(axis=0)
writer = pd.ExcelWriter(self.path + self.file_out, engine='xlsxwriter')
dfs = {
'All': df,
'OK': df_OK,
'KO': df_KO
}
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
b = blah('C:/Users/......./',
'path...',
'file_in....',
'file_out...')
b.process_file()
It is because you overwrite the same Excel file in every iteration of your for sheet_name in dfs.keys() loop. So every time you write an Excel file with only a single sheet to the same filename, thus overwriting the previous document.
You should move the writer.save() outside your loop like so:
for sheet_name in dfs.keys():
dfs[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()

Categories

Resources