Writing Pandas DataFrame to Excel: How to auto-adjust column widths - python

I am trying to write a series of pandas DataFrames to an Excel worksheet such that:
The existing contents of the worksheet are not overwritten or erased, and
the Excel column widths are adjusted to fit the lengths of the column entries (so that I don't have to manually do this in Excel).
For 1), I have found an excellent solution in the form of a helper function written by #MaxU: How to write to an existing excel file without overwriting data (using pandas)?. For 2) I found what looked like a good solution here. But when I try to put these solutions together, the column widths don't change at all. Here's my full code:
import pandas as pd
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
"""
Now attempt to adjust the column widths as necessary so that all the cell contents are visible
in Excel. The code below is taken from https://towardsdatascience.com/how-to-auto-adjust-the-width-of-excel-columns-with-pandas-excelwriter-60cee36e175e.
"""
for column in df:
column_width = max(df[column].astype(str).map(len).max(), len(column))
col_idx = df.columns.get_loc(column)
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)
writer.save()
Now I tried testing the function:
df = pd.DataFrame({'A_Very_Long_Column_Name': [10, 20, 30, 20, 15, 30, 45]})
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df, "Sheet1")
A new Excel workbook named test.xlsx is created along with a sheet named Sheet1, and the contents of df are written to Sheet1, but the column widths are completely unaffected:
And strangely, when I try to execute the function a second time (without changing the arguments), I get an error:
runcell(2, 'C:/Users/Leonidas/Documents/write_to_excel2.py')
Traceback (most recent call last):
File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 125, in <module>
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df,
File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 100, in append_df_to_excel
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)
AttributeError: 'Worksheet' object has no attribute 'set_column'
I'm pretty confused at this point...Any suggestions for how to fix the code would be greatly appreciated.

Try to use this helper function (updated version):
Old version, which is no longer compatible with Pandas 1.3.0+:
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Union, Optional, List, Tuple
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def append_df_to_excel(
filename: Union[str, Path],
df: pd.DataFrame,
sheet_name: str = 'Sheet1',
startrow: int = None,
max_col_width: int = 40,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
**to_excel_kwargs
) -> None:
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param max_col_width: maximum column width in Excel. Default: 30
#param autofilter: boolean - whether add Excel autofilter or not. Default: True
#param fmt_int: Excel format for integer numbers
#param fmt_float: Excel format for float numbers
#param fmt_date: Excel format for dates
#param fmt_datetime: Excel format for datetime's
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df, autofilter=True,
freeze_panes=(1,0))
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
>>> append_df_to_excel('d:/temp/test.xlsx', df, index=False,
fmt_datetime="dd.mm.yyyy hh:mm")
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
first_col = int(to_excel_kwargs.get("index", True)) + 1
sheet_name = to_excel_kwargs.get("sheet_name", "Sheet1")
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
with pd.ExcelWriter(
filename.with_suffix(".xlsx"),
engine="openpyxl",
mode="a" if file_exists else "w",
date_format=fmt_date,
datetime_format=fmt_datetime,
**to_excel_kwargs
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
df.to_excel(writer, sheet_name=sheet_name, startrow=startrow,
**to_excel_kwargs)
# automatically set columns' width
worksheet = writer.sheets[sheet_name]
for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(df.iloc[:, col_no].astype(str).str.len().max(),
len(df.columns[col_no]) + 6)
width = min(max_col_width, width)
# print(f"column: [{df.columns[col_no]} ({dtyp.name})]\twidth:\t[{width}]")
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions

You can also try using the openpyxl bestFit attribute, which sets the column width to the same width that double clicking on the border of the column does. It should do the trick. Try doing something like this:
for column in df:
ws.column_dimensions[column].bestFit = True
Depending on why you're exporting to Excel, you could also look into a number of different Python based spreadsheets. I'm the author of one called Mito. It lets you display your pandas dataframe as an interactive spreadsheet.

This code works when using 'openpyxl' as your engine.
Edit any part as you wish.
def text_length(text):
"""
Get the effective text length in characters, taking into account newlines
"""
if not text:
return 0
lines = text.split("\n")
return max(len(line) for line in lines)
def _to_str_for_length(v, decimals=3):
"""
Like str() but rounds decimals to predefined length
"""
if isinstance(v, float):
# Round to [decimal] places
return str(Decimal(v).quantize(Decimal('1.' + '0' * decimals)).normalize())
else:
return str(v)
def auto_adjust_xlsx_column_width(df, writer, sheet_name, margin=3, length_factor=1.0, decimals=3, index=False):
sheet = writer.sheets[sheet_name]
_to_str = functools.partial(_to_str_for_length, decimals=decimals)
# Compute & set column width for each column
for column_name in df.columns:
# Convert the value of the columns to string and select the
column_length = max(df[column_name].apply(_to_str).map(text_length).max(), text_length(column_name)) + 5
# Get index of column in XLSX
# Column index is +1 if we also export the index column
col_idx = df.columns.get_loc(column_name)
if index:
col_idx += 1
# Set width of column to (column_length + margin)
sheet.column_dimensions[openpyxl.utils.cell.get_column_letter(col_idx + 1)].width = column_length * length_factor + margin
# Compute column width of index column (if enabled)
if index: # If the index column is being exported
index_length = max(df.index.map(_to_str).map(text_length).max(), text_length(df.index.name))
sheet.column_dimensions["A"].width = index_length * length_factor + margin
Example of using it
with pd.ExcelWriter('report.xlsx', sheet_name="results", engine='openpyxl') as writer:
report_df.to_excel(writer,index=False, sheet_name="results")
auto_adjust_xlsx_column_width(report_df, writer, "results")

Related

Python Pandas: Copy CSV to Excel without overwriting [duplicate]

I currently have this code. It works perfectly.
It loops through excel files in a folder,
removes the first 2 rows, then saves them as individual excel files,
and it also saves the files in the loop as an appended file.
Currently the appended file overwrites the existing file each time I run the code.
I need to append the new data to the bottom of the already existing excel sheet ('master_data.xlsx)
dfList = []
path = 'C:\\Test\\TestRawFile'
newpath = 'C:\\Path\\To\\New\\Folder'
for fn in os.listdir(path):
# Absolute file path
file = os.path.join(path, fn)
if os.path.isfile(file):
# Import the excel file and call it xlsx_file
xlsx_file = pd.ExcelFile(file)
# View the excel files sheet names
xlsx_file.sheet_names
# Load the xlsx files Data sheet as a dataframe
df = xlsx_file.parse('Sheet1',header= None)
df_NoHeader = df[2:]
data = df_NoHeader
# Save individual dataframe
data.to_excel(os.path.join(newpath, fn))
dfList.append(data)
appended_data = pd.concat(dfList)
appended_data.to_excel(os.path.join(newpath, 'master_data.xlsx'))
I thought this would be a simple task, but I guess not.
I think I need to bring in the master_data.xlsx file as a dataframe, then match the index up with the new appended data, and save it back out. Or maybe there is an easier way. Any Help is appreciated.
UPDATE [2022-01-08]: starting from version 1.4.0 Pandas supports appending to existing Excel sheet, preserving the old contents, "out of the box"!
Good job Pandas Team!
Excerpt from the ExcelWriter documentation:
if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error'
How to behave when trying to write to a sheet that already
exists (append mode only).
...
* overlay: Write contents to the existing sheet without removing the old contents.
.. versionadded:: 1.3.0
.. versionchanged:: 1.4.0
Added ``overlay`` option
For Pandas versions < 1.4.0 please find below a helper function for appending a Pandas DataFrame to an existing Excel file.
If an Excel file doesn't exist then it will be created.
UPDATE [2021-09-12]: fixed for Pandas 1.3.0+
The following functions have been tested with:
Pandas 1.3.2
OpenPyxl 3.0.7
from pathlib import Path
from copy import copy
from typing import Union, Optional
import numpy as np
import pandas as pd
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def copy_excel_cell_range(
src_ws: openpyxl.worksheet.worksheet.Worksheet,
min_row: int = None,
max_row: int = None,
min_col: int = None,
max_col: int = None,
tgt_ws: openpyxl.worksheet.worksheet.Worksheet = None,
tgt_min_row: int = 1,
tgt_min_col: int = 1,
with_style: bool = True
) -> openpyxl.worksheet.worksheet.Worksheet:
"""
copies all cells from the source worksheet [src_ws] starting from [min_row] row
and [min_col] column up to [max_row] row and [max_col] column
to target worksheet [tgt_ws] starting from [tgt_min_row] row
and [tgt_min_col] column.
#param src_ws: source worksheet
#param min_row: smallest row index in the source worksheet (1-based index)
#param max_row: largest row index in the source worksheet (1-based index)
#param min_col: smallest column index in the source worksheet (1-based index)
#param max_col: largest column index in the source worksheet (1-based index)
#param tgt_ws: target worksheet.
If None, then the copy will be done to the same (source) worksheet.
#param tgt_min_row: target row index (1-based index)
#param tgt_min_col: target column index (1-based index)
#param with_style: whether to copy cell style. Default: True
#return: target worksheet object
"""
if tgt_ws is None:
tgt_ws = src_ws
# https://stackoverflow.com/a/34838233/5741205
for row in src_ws.iter_rows(min_row=min_row, max_row=max_row,
min_col=min_col, max_col=max_col):
for cell in row:
tgt_cell = tgt_ws.cell(
row=cell.row + tgt_min_row - 1,
column=cell.col_idx + tgt_min_col - 1,
value=cell.value
)
if with_style and cell.has_style:
# tgt_cell._style = copy(cell._style)
tgt_cell.font = copy(cell.font)
tgt_cell.border = copy(cell.border)
tgt_cell.fill = copy(cell.fill)
tgt_cell.number_format = copy(cell.number_format)
tgt_cell.protection = copy(cell.protection)
tgt_cell.alignment = copy(cell.alignment)
return tgt_ws
def append_df_to_excel(
filename: Union[str, Path],
df: pd.DataFrame,
sheet_name: str = 'Sheet1',
startrow: Optional[int] = None,
max_col_width: int = 30,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
storage_options: Optional[dict] = None,
**to_excel_kwargs
) -> None:
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param max_col_width: maximum column width in Excel. Default: 40
#param autofilter: boolean - whether add Excel autofilter or not. Default: False
#param fmt_int: Excel format for integer numbers
#param fmt_float: Excel format for float numbers
#param fmt_date: Excel format for dates
#param fmt_datetime: Excel format for datetime's
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param storage_options: dict, optional
Extra options that make sense for a particular storage connection, e.g. host, port,
username, password, etc., if using a URL that will be parsed by fsspec, e.g.,
starting “s3://”, “gcs://”.
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('/tmp/test.xlsx', df, autofilter=True,
freeze_panes=(1,0))
>>> append_df_to_excel('/tmp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
>>> append_df_to_excel('/tmp/test.xlsx', df, index=False,
fmt_datetime="dd.mm.yyyy hh:mm")
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
# calculate first column number
# if the DF will be written using `index=True`, then `first_col = 2`, else `first_col = 1`
first_col = int(to_excel_kwargs.get("index", True)) + 1
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
# save content of existing sheets
if file_exists:
wb = load_workbook(filename)
sheet_names = wb.sheetnames
sheet_exists = sheet_name in sheet_names
sheets = {ws.title: ws for ws in wb.worksheets}
with pd.ExcelWriter(
filename.with_suffix(".xlsx"),
engine="openpyxl",
mode="a" if file_exists else "w",
if_sheet_exists="new" if file_exists else None,
date_format=fmt_date,
datetime_format=fmt_datetime,
storage_options=storage_options
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = wb
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = sheets
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
df.to_excel(writer, sheet_name=sheet_name, **to_excel_kwargs)
worksheet = writer.sheets[sheet_name]
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions
for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(df.iloc[:, col_no].astype(str).str.len().max(),
len(df.columns[col_no]) + 6)
width = min(max_col_width, width)
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if file_exists and sheet_exists:
# move (append) rows from new worksheet to the `sheet_name` worksheet
wb = load_workbook(filename)
# retrieve generated worksheet name
new_sheet_name = set(wb.sheetnames) - set(sheet_names)
if new_sheet_name:
new_sheet_name = list(new_sheet_name)[0]
# copy rows written by `df.to_excel(...)` to
copy_excel_cell_range(
src_ws=wb[new_sheet_name],
tgt_ws=wb[sheet_name],
tgt_min_row=startrow + 1,
with_style=True
)
# remove new (generated by Pandas) worksheet
del wb[new_sheet_name]
wb.save(filename)
wb.close()
Old version (tested with Pandas 1.2.3 and Openpyxl 3.0.5):
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
Usage examples:
filename = r'C:\OCC.xlsx'
append_df_to_excel(filename, df)
append_df_to_excel(filename, df, header=None, index=False)
append_df_to_excel(filename, df, sheet_name='Sheet2', index=False)
append_df_to_excel(filename, df, sheet_name='Sheet2', index=False, startrow=25)
c:/temp/test.xlsx:
PS you may also want to specify header=None if you don't want to duplicate column names...
UPDATE: you may also want to check this old solution
If you aren't strictly looking for an excel file, then get the output as csv file and just copy the csv to a new excel file.
Note: this only works when you have less than 1000 columns since csv has a limit on the number of columns you can write.
df.to_csv('filepath', mode='a', index = False, header=None)
mode='a' means append.
This is a roundabout way it but works neat!
Building on MaxU and others' code and comments but simplifying to only fix the bug with pandas ExcelWriter that causes to_excel to create a new sheet rather than append to an existing sheet in append mode.
As others have noted, to_excel uses the ExcelWriter.sheets property and this is not populated when by ExcelWriter.
Fix is a one liner, otherwise code is standard pandas approach as documented in to_excel.
# xl_path is destination xlsx spreadsheet
with pd.ExcelWriter(xl_path, 'openpyxl', mode='a') as writer:
# fix line
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
df.to_excel(writer, sheet_name)
import pandas as pd
import openpyxl
workbook = openpyxl.load_workbook("test.xlsx")
writer = pd.ExcelWriter('test.xlsx', engine='openpyxl')
writer.book = workbook
writer.sheets = dict((ws.title, ws) for ws in workbook.worksheets)
data_df.to_excel(writer, 'Existing_sheetname')
writer.save()
writer.close()
If you use ExcelWriter on the sheet every time it is going to override the previous sheet and all that will be visible is the last data sheet you appended to the workbook.
Instead you can maintain a counter that is 1 initially for which you need to initialize the excel sheet and add initial data using the existing approach of
writer = pd.ExcelWriter(output_file, engine='openpyxl')
df = pd.read_excel(output_file, sheet_name='TestSheet1')
or you can use the following approach i used. to load the workbook next time you want to use it or else file not find exception if you try to load it in the first case.
USage:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from openpyxl import load_workbook
urls = ["http://millenniumcricketleague.com/Home/ShowTeam.aspx?tid=22",
"http://millenniumcricketleague.com/Home/ShowTeam.aspx?tid=40"]
path = "F:\meta_1.xlsx"
writer = pd.ExcelWriter(path,engine='openpyxl')
counter = 1
for url in urls:
table_data = []
final = []
html_content = requests.get(url).text
soup = BeautifulSoup(html_content, "lxml")
x = soup.find_all('table')
for table in x[1:]:
for tr in table.find_all("tr"):
newrow = []
for td in tr.find_all("td"):
newrow.append(td.text.replace('\n', ' ').strip())
table_data.append(newrow)
df = pd.DataFrame(table_data)
sheetname = 'Sheet%s' % counter
if(counter!=1):
writer.book = load_workbook(path)
df.to_excel(writer, sheet_name=sheetname)
counter = counter + 1
writer.save()
NO need to close the excelwriter. its an automatic function. Will show you a warning if you define it explicitly
This worked for me
import os
import openpyxl
import pandas as pd
from openpyxl.utils.dataframe import dataframe_to_rows
file = r"myfile.xlsx"
df = pd.DataFrame({'A': 1, 'B': 2})
# create excel file
if os.path.isfile(file): # if file already exists append to existing file
workbook = openpyxl.load_workbook(file) # load workbook if already exists
sheet = workbook['my_sheet_name'] # declare the active sheet
# append the dataframe results to the current excel file
for row in dataframe_to_rows(df, header = False, index = False):
sheet.append(row)
workbook.save(file) # save workbook
workbook.close() # close workbook
else: # create the excel file if doesn't already exist
with pd.ExcelWriter(path = file, engine = 'openpyxl') as writer:
df.to_excel(writer, index = False, sheet_name = 'my_sheet_name')
This question has been out here a while. The answer is ok, but I believe this will solve most peoples question.
simply use glob to access the files in a specific directory, loop through them, create a dataframe of each file, append it to the last one, then export to a folder. I also included commented out code to run through this with csvs.
import os
import pandas as pd
import glob
# put in path to folder with files you want to append
# *.xlsx or *.csv will get all files of that type
path = "C:/Users/Name/Folder/*.xlsx"
#path = "C:/Users/Name/Folder/*.csv"
# initialize a empty df
appended_data = pd.DataFrame()
#loop through each file in the path
for file in glob.glob(path):
print(file)
# create a df of that file path
df = pd.read_excel(file, sheet_name = 0)
#df = pd.read_csv(file, sep=',')
# appened it
appended_data = appended_data.append(df)
appended_data
# export the appeneded data to a folder of your choice
exportPath = 'C:/My/EXPORT/PATH/appended_dataExport.csv'
appended_data.to_csv(os.path.join(exportPath),index=False)
Complementing to #david, if you dont care the index and you can use .csv, this function helps to append any df to an existing csv
def append_df(self, path_file, df):
with open(path_file, 'a+') as f:
df.to_csv(f, header=f.tell() == 0, encoding='utf-8', index=False)
Notes:
a+ create the file if it doesnot exist
f.tell() == 0 add header if the first row
from openpyxl import load_workbook
wb = load_workbook(filepath)
ws = wb["Sheet1"]
df = dataframe.values.tolist()
for i in range(len(df)):
ws.append(df[i])
wb.save(filepath)
Append DataFrame to existing excel file
Use ExcelWriter to append DataFrame to an existing excel file. This is a simple approach and uses the existing library features.
with pd.ExcelWriter('existing_excel_file.xlsx',mode='a') as writer:
df.to_excel(writer, sheet_name='existing_sheet_name')
For detailed examples refer to pandas read Excel File with Examples

Appending already existing excel file with pandas [duplicate]

I currently have this code. It works perfectly.
It loops through excel files in a folder,
removes the first 2 rows, then saves them as individual excel files,
and it also saves the files in the loop as an appended file.
Currently the appended file overwrites the existing file each time I run the code.
I need to append the new data to the bottom of the already existing excel sheet ('master_data.xlsx)
dfList = []
path = 'C:\\Test\\TestRawFile'
newpath = 'C:\\Path\\To\\New\\Folder'
for fn in os.listdir(path):
# Absolute file path
file = os.path.join(path, fn)
if os.path.isfile(file):
# Import the excel file and call it xlsx_file
xlsx_file = pd.ExcelFile(file)
# View the excel files sheet names
xlsx_file.sheet_names
# Load the xlsx files Data sheet as a dataframe
df = xlsx_file.parse('Sheet1',header= None)
df_NoHeader = df[2:]
data = df_NoHeader
# Save individual dataframe
data.to_excel(os.path.join(newpath, fn))
dfList.append(data)
appended_data = pd.concat(dfList)
appended_data.to_excel(os.path.join(newpath, 'master_data.xlsx'))
I thought this would be a simple task, but I guess not.
I think I need to bring in the master_data.xlsx file as a dataframe, then match the index up with the new appended data, and save it back out. Or maybe there is an easier way. Any Help is appreciated.
UPDATE [2022-01-08]: starting from version 1.4.0 Pandas supports appending to existing Excel sheet, preserving the old contents, "out of the box"!
Good job Pandas Team!
Excerpt from the ExcelWriter documentation:
if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error'
How to behave when trying to write to a sheet that already
exists (append mode only).
...
* overlay: Write contents to the existing sheet without removing the old contents.
.. versionadded:: 1.3.0
.. versionchanged:: 1.4.0
Added ``overlay`` option
For Pandas versions < 1.4.0 please find below a helper function for appending a Pandas DataFrame to an existing Excel file.
If an Excel file doesn't exist then it will be created.
UPDATE [2021-09-12]: fixed for Pandas 1.3.0+
The following functions have been tested with:
Pandas 1.3.2
OpenPyxl 3.0.7
from pathlib import Path
from copy import copy
from typing import Union, Optional
import numpy as np
import pandas as pd
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def copy_excel_cell_range(
src_ws: openpyxl.worksheet.worksheet.Worksheet,
min_row: int = None,
max_row: int = None,
min_col: int = None,
max_col: int = None,
tgt_ws: openpyxl.worksheet.worksheet.Worksheet = None,
tgt_min_row: int = 1,
tgt_min_col: int = 1,
with_style: bool = True
) -> openpyxl.worksheet.worksheet.Worksheet:
"""
copies all cells from the source worksheet [src_ws] starting from [min_row] row
and [min_col] column up to [max_row] row and [max_col] column
to target worksheet [tgt_ws] starting from [tgt_min_row] row
and [tgt_min_col] column.
#param src_ws: source worksheet
#param min_row: smallest row index in the source worksheet (1-based index)
#param max_row: largest row index in the source worksheet (1-based index)
#param min_col: smallest column index in the source worksheet (1-based index)
#param max_col: largest column index in the source worksheet (1-based index)
#param tgt_ws: target worksheet.
If None, then the copy will be done to the same (source) worksheet.
#param tgt_min_row: target row index (1-based index)
#param tgt_min_col: target column index (1-based index)
#param with_style: whether to copy cell style. Default: True
#return: target worksheet object
"""
if tgt_ws is None:
tgt_ws = src_ws
# https://stackoverflow.com/a/34838233/5741205
for row in src_ws.iter_rows(min_row=min_row, max_row=max_row,
min_col=min_col, max_col=max_col):
for cell in row:
tgt_cell = tgt_ws.cell(
row=cell.row + tgt_min_row - 1,
column=cell.col_idx + tgt_min_col - 1,
value=cell.value
)
if with_style and cell.has_style:
# tgt_cell._style = copy(cell._style)
tgt_cell.font = copy(cell.font)
tgt_cell.border = copy(cell.border)
tgt_cell.fill = copy(cell.fill)
tgt_cell.number_format = copy(cell.number_format)
tgt_cell.protection = copy(cell.protection)
tgt_cell.alignment = copy(cell.alignment)
return tgt_ws
def append_df_to_excel(
filename: Union[str, Path],
df: pd.DataFrame,
sheet_name: str = 'Sheet1',
startrow: Optional[int] = None,
max_col_width: int = 30,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
storage_options: Optional[dict] = None,
**to_excel_kwargs
) -> None:
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param max_col_width: maximum column width in Excel. Default: 40
#param autofilter: boolean - whether add Excel autofilter or not. Default: False
#param fmt_int: Excel format for integer numbers
#param fmt_float: Excel format for float numbers
#param fmt_date: Excel format for dates
#param fmt_datetime: Excel format for datetime's
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param storage_options: dict, optional
Extra options that make sense for a particular storage connection, e.g. host, port,
username, password, etc., if using a URL that will be parsed by fsspec, e.g.,
starting “s3://”, “gcs://”.
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('/tmp/test.xlsx', df, autofilter=True,
freeze_panes=(1,0))
>>> append_df_to_excel('/tmp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
>>> append_df_to_excel('/tmp/test.xlsx', df, index=False,
fmt_datetime="dd.mm.yyyy hh:mm")
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
# calculate first column number
# if the DF will be written using `index=True`, then `first_col = 2`, else `first_col = 1`
first_col = int(to_excel_kwargs.get("index", True)) + 1
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
# save content of existing sheets
if file_exists:
wb = load_workbook(filename)
sheet_names = wb.sheetnames
sheet_exists = sheet_name in sheet_names
sheets = {ws.title: ws for ws in wb.worksheets}
with pd.ExcelWriter(
filename.with_suffix(".xlsx"),
engine="openpyxl",
mode="a" if file_exists else "w",
if_sheet_exists="new" if file_exists else None,
date_format=fmt_date,
datetime_format=fmt_datetime,
storage_options=storage_options
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = wb
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = sheets
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
df.to_excel(writer, sheet_name=sheet_name, **to_excel_kwargs)
worksheet = writer.sheets[sheet_name]
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions
for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(df.iloc[:, col_no].astype(str).str.len().max(),
len(df.columns[col_no]) + 6)
width = min(max_col_width, width)
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if file_exists and sheet_exists:
# move (append) rows from new worksheet to the `sheet_name` worksheet
wb = load_workbook(filename)
# retrieve generated worksheet name
new_sheet_name = set(wb.sheetnames) - set(sheet_names)
if new_sheet_name:
new_sheet_name = list(new_sheet_name)[0]
# copy rows written by `df.to_excel(...)` to
copy_excel_cell_range(
src_ws=wb[new_sheet_name],
tgt_ws=wb[sheet_name],
tgt_min_row=startrow + 1,
with_style=True
)
# remove new (generated by Pandas) worksheet
del wb[new_sheet_name]
wb.save(filename)
wb.close()
Old version (tested with Pandas 1.2.3 and Openpyxl 3.0.5):
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
Usage examples:
filename = r'C:\OCC.xlsx'
append_df_to_excel(filename, df)
append_df_to_excel(filename, df, header=None, index=False)
append_df_to_excel(filename, df, sheet_name='Sheet2', index=False)
append_df_to_excel(filename, df, sheet_name='Sheet2', index=False, startrow=25)
c:/temp/test.xlsx:
PS you may also want to specify header=None if you don't want to duplicate column names...
UPDATE: you may also want to check this old solution
If you aren't strictly looking for an excel file, then get the output as csv file and just copy the csv to a new excel file.
Note: this only works when you have less than 1000 columns since csv has a limit on the number of columns you can write.
df.to_csv('filepath', mode='a', index = False, header=None)
mode='a' means append.
This is a roundabout way it but works neat!
Building on MaxU and others' code and comments but simplifying to only fix the bug with pandas ExcelWriter that causes to_excel to create a new sheet rather than append to an existing sheet in append mode.
As others have noted, to_excel uses the ExcelWriter.sheets property and this is not populated when by ExcelWriter.
Fix is a one liner, otherwise code is standard pandas approach as documented in to_excel.
# xl_path is destination xlsx spreadsheet
with pd.ExcelWriter(xl_path, 'openpyxl', mode='a') as writer:
# fix line
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
df.to_excel(writer, sheet_name)
import pandas as pd
import openpyxl
workbook = openpyxl.load_workbook("test.xlsx")
writer = pd.ExcelWriter('test.xlsx', engine='openpyxl')
writer.book = workbook
writer.sheets = dict((ws.title, ws) for ws in workbook.worksheets)
data_df.to_excel(writer, 'Existing_sheetname')
writer.save()
writer.close()
If you use ExcelWriter on the sheet every time it is going to override the previous sheet and all that will be visible is the last data sheet you appended to the workbook.
Instead you can maintain a counter that is 1 initially for which you need to initialize the excel sheet and add initial data using the existing approach of
writer = pd.ExcelWriter(output_file, engine='openpyxl')
df = pd.read_excel(output_file, sheet_name='TestSheet1')
or you can use the following approach i used. to load the workbook next time you want to use it or else file not find exception if you try to load it in the first case.
USage:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from openpyxl import load_workbook
urls = ["http://millenniumcricketleague.com/Home/ShowTeam.aspx?tid=22",
"http://millenniumcricketleague.com/Home/ShowTeam.aspx?tid=40"]
path = "F:\meta_1.xlsx"
writer = pd.ExcelWriter(path,engine='openpyxl')
counter = 1
for url in urls:
table_data = []
final = []
html_content = requests.get(url).text
soup = BeautifulSoup(html_content, "lxml")
x = soup.find_all('table')
for table in x[1:]:
for tr in table.find_all("tr"):
newrow = []
for td in tr.find_all("td"):
newrow.append(td.text.replace('\n', ' ').strip())
table_data.append(newrow)
df = pd.DataFrame(table_data)
sheetname = 'Sheet%s' % counter
if(counter!=1):
writer.book = load_workbook(path)
df.to_excel(writer, sheet_name=sheetname)
counter = counter + 1
writer.save()
NO need to close the excelwriter. its an automatic function. Will show you a warning if you define it explicitly
This worked for me
import os
import openpyxl
import pandas as pd
from openpyxl.utils.dataframe import dataframe_to_rows
file = r"myfile.xlsx"
df = pd.DataFrame({'A': 1, 'B': 2})
# create excel file
if os.path.isfile(file): # if file already exists append to existing file
workbook = openpyxl.load_workbook(file) # load workbook if already exists
sheet = workbook['my_sheet_name'] # declare the active sheet
# append the dataframe results to the current excel file
for row in dataframe_to_rows(df, header = False, index = False):
sheet.append(row)
workbook.save(file) # save workbook
workbook.close() # close workbook
else: # create the excel file if doesn't already exist
with pd.ExcelWriter(path = file, engine = 'openpyxl') as writer:
df.to_excel(writer, index = False, sheet_name = 'my_sheet_name')
This question has been out here a while. The answer is ok, but I believe this will solve most peoples question.
simply use glob to access the files in a specific directory, loop through them, create a dataframe of each file, append it to the last one, then export to a folder. I also included commented out code to run through this with csvs.
import os
import pandas as pd
import glob
# put in path to folder with files you want to append
# *.xlsx or *.csv will get all files of that type
path = "C:/Users/Name/Folder/*.xlsx"
#path = "C:/Users/Name/Folder/*.csv"
# initialize a empty df
appended_data = pd.DataFrame()
#loop through each file in the path
for file in glob.glob(path):
print(file)
# create a df of that file path
df = pd.read_excel(file, sheet_name = 0)
#df = pd.read_csv(file, sep=',')
# appened it
appended_data = appended_data.append(df)
appended_data
# export the appeneded data to a folder of your choice
exportPath = 'C:/My/EXPORT/PATH/appended_dataExport.csv'
appended_data.to_csv(os.path.join(exportPath),index=False)
Complementing to #david, if you dont care the index and you can use .csv, this function helps to append any df to an existing csv
def append_df(self, path_file, df):
with open(path_file, 'a+') as f:
df.to_csv(f, header=f.tell() == 0, encoding='utf-8', index=False)
Notes:
a+ create the file if it doesnot exist
f.tell() == 0 add header if the first row
from openpyxl import load_workbook
wb = load_workbook(filepath)
ws = wb["Sheet1"]
df = dataframe.values.tolist()
for i in range(len(df)):
ws.append(df[i])
wb.save(filepath)
Append DataFrame to existing excel file
Use ExcelWriter to append DataFrame to an existing excel file. This is a simple approach and uses the existing library features.
with pd.ExcelWriter('existing_excel_file.xlsx',mode='a') as writer:
df.to_excel(writer, sheet_name='existing_sheet_name')
For detailed examples refer to pandas read Excel File with Examples

Add new row to existing excel file without overwriting the data (Python)

So I copied this code from How to write to an existing excel file without overwriting data (using pandas)?
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Parameters:
filename : File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
df : dataframe to save to workbook
sheet_name : Name of sheet which will contain DataFrame.
(default: 'Sheet1')
startrow : upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
truncate_sheet : truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
[can be dictionary]
Returns: None
"""
from openpyxl import load_workbook
import pandas as pd
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
What the function does is: it adds a dataframe(row) to an existing excel file.
However this function is replacing the row of the original excel file, for the row of the dataframe.
How can I change/add to this function for it first to insert the row rather than repalce the row, thus maintaining the original data and adding the new one?
After # try to open an existing workbook
writer.book = load_workbook(filename)
Add ws= writer.book[sheet_name]
ws.insert_rows("number of the row you want to insert")

Insert a panda's dataframe into existant excel [duplicate]

I use pandas to write to excel file in the following fashion:
import pandas
writer = pandas.ExcelWriter('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
Masterfile.xlsx already consists of number of different tabs. However, it does not yet contain "Main".
Pandas correctly writes to "Main" sheet, unfortunately it also deletes all other tabs.
Pandas docs says it uses openpyxl for xlsx files. Quick look through the code in ExcelWriter gives a clue that something like this might work out:
import pandas
from openpyxl import load_workbook
book = load_workbook('Masterfile.xlsx')
writer = pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl')
writer.book = book
## ExcelWriter for some reason uses writer.sheets to access the sheet.
## If you leave it empty it will not know that sheet Main is already there
## and will create a new sheet.
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
UPDATE: Starting from Pandas 1.3.0 the following function will not work properly, because functions DataFrame.to_excel() and pd.ExcelWriter() have been changed - a new if_sheet_exists parameter has been introduced, which has invalidated the function below.
Here you can find an updated version of the append_df_to_excel(), which is working for Pandas 1.3.0+.
Here is a helper function:
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
Tested with the following versions:
Pandas 1.2.3
Openpyxl 3.0.5
With openpyxlversion 2.4.0 and pandasversion 0.19.2, the process #ski came up with gets a bit simpler:
import pandas
from openpyxl import load_workbook
with pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl') as writer:
writer.book = load_workbook('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
#That's it!
Starting in pandas 0.24 you can simplify this with the mode keyword argument of ExcelWriter:
import pandas as pd
with pd.ExcelWriter('the_file.xlsx', engine='openpyxl', mode='a') as writer:
data_filtered.to_excel(writer)
I know this is an older thread, but this is the first item you find when searching, and the above solutions don't work if you need to retain charts in a workbook that you already have created. In that case, xlwings is a better option - it allows you to write to the excel book and keeps the charts/chart data.
simple example:
import xlwings as xw
import pandas as pd
#create DF
months = ['2017-01','2017-02','2017-03','2017-04','2017-05','2017-06','2017-07','2017-08','2017-09','2017-10','2017-11','2017-12']
value1 = [x * 5+5 for x in range(len(months))]
df = pd.DataFrame(value1, index = months, columns = ['value1'])
df['value2'] = df['value1']+5
df['value3'] = df['value2']+5
#load workbook that has a chart in it
wb = xw.Book('C:\\data\\bookwithChart.xlsx')
ws = wb.sheets['chartData']
ws.range('A1').options(index=False).value = df
wb = xw.Book('C:\\data\\bookwithChart_updated.xlsx')
xw.apps[0].quit()
Old question, but I am guessing some people still search for this - so...
I find this method nice because all worksheets are loaded into a dictionary of sheet name and dataframe pairs, created by pandas with the sheetname=None option. It is simple to add, delete or modify worksheets between reading the spreadsheet into the dict format and writing it back from the dict. For me the xlsxwriter works better than openpyxl for this particular task in terms of speed and format.
Note: future versions of pandas (0.21.0+) will change the "sheetname" parameter to "sheet_name".
# read a single or multi-sheet excel file
# (returns dict of sheetname(s), dataframe(s))
ws_dict = pd.read_excel(excel_file_path,
sheetname=None)
# all worksheets are accessible as dataframes.
# easy to change a worksheet as a dataframe:
mod_df = ws_dict['existing_worksheet']
# do work on mod_df...then reassign
ws_dict['existing_worksheet'] = mod_df
# add a dataframe to the workbook as a new worksheet with
# ws name, df as dict key, value:
ws_dict['new_worksheet'] = some_other_dataframe
# when done, write dictionary back to excel...
# xlsxwriter honors datetime and date formats
# (only included as example)...
with pd.ExcelWriter(excel_file_path,
engine='xlsxwriter',
datetime_format='yyyy-mm-dd',
date_format='yyyy-mm-dd') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
For the example in the 2013 question:
ws_dict = pd.read_excel('Masterfile.xlsx',
sheetname=None)
ws_dict['Main'] = data_filtered[['Diff1', 'Diff2']]
with pd.ExcelWriter('Masterfile.xlsx',
engine='xlsxwriter') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
There is a better solution in pandas 0.24:
with pd.ExcelWriter(path, mode='a') as writer:
s.to_excel(writer, sheet_name='another sheet', index=False)
before:
after:
so upgrade your pandas now:
pip install --upgrade pandas
The solution of #MaxU is not working for the updated version of python and related packages. It raises the error:
"zipfile.BadZipFile: File is not a zip file"
I generated a new version of the function that works fine with the updated version of python and related packages and tested with python: 3.9 | openpyxl: 3.0.6 | pandas: 1.2.3
In addition I added more features to the helper function:
Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
Here the function:
import pandas as pd
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Parameters:
filename : File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
df : dataframe to save to workbook
sheet_name : Name of sheet which will contain DataFrame.
(default: 'Sheet1')
startrow : upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
truncate_sheet : truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
resizeColumns: default = True . It resize all columns based on cell content width
to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
[can be dictionary]
na_rep: default = 'NA'. If, instead of NaN, you want blank cells, just edit as follows: na_rep=''
Returns: None
*******************
CONTRIBUTION:
Current helper function generated by [Baggio]: https://stackoverflow.com/users/14302009/baggio?tab=profile
Contributions to the current helper function: https://stackoverflow.com/users/4046632/buran?tab=profile
Original helper function: (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
Features of the new helper function:
1) Now it works with python 3.9 and latest versions of pandas and openpxl
---> Fixed the error: "zipfile.BadZipFile: File is not a zip file".
2) Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
3) You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
4) Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
*******************
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs)
if resizeColumns:
ws = writer.book[sheet_name]
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
# save the workbook
writer.save()
Example Usage:
# Create a sample dataframe
df = pd.DataFrame({'numbers': [1, 2, 3],
'colors': ['red', 'white', 'blue'],
'colorsTwo': ['yellow', 'white', 'blue'],
'NaNcheck': [float('NaN'), 1, float('NaN')],
})
# EDIT YOUR PATH FOR THE EXPORT
filename = r"C:\DataScience\df.xlsx"
# RUN ONE BY ONE IN ROW THE FOLLOWING LINES, TO SEE THE DIFFERENT UPDATES TO THE EXCELFILE
append_df_to_excel(filename, df, index=False, startrow=0) # Basic Export of df in default sheet (Sheet1)
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0) # Append the sheet "Cool" where "df" is written
append_df_to_excel(filename, df, sheet_name="Cool", index=False) # Append another "df" to the sheet "Cool", just below the other "df" instance
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0, startcol=5) # Append another "df" to the sheet "Cool" starting from col 5
append_df_to_excel(filename, df, index=False, truncate_sheet=True, startrow=10, na_rep = '') # Override (truncate) the "Sheet1", writing the df from row 10, and showing blank cells instead of NaN
def append_sheet_to_master(self, master_file_path, current_file_path, sheet_name):
try:
master_book = load_workbook(master_file_path)
master_writer = pandas.ExcelWriter(master_file_path, engine='openpyxl')
master_writer.book = master_book
master_writer.sheets = dict((ws.title, ws) for ws in master_book.worksheets)
current_frames = pandas.ExcelFile(current_file_path).parse(pandas.ExcelFile(current_file_path).sheet_names[0],
header=None,
index_col=None)
current_frames.to_excel(master_writer, sheet_name, index=None, header=False)
master_writer.save()
except Exception as e:
raise e
This works perfectly fine only thing is that formatting of the master file(file to which we add new sheet) is lost.
writer = pd.ExcelWriter('prueba1.xlsx'engine='openpyxl',keep_date_col=True)
The "keep_date_col" hope help you
I used the answer described here
from openpyxl import load_workbook
writer = pd.ExcelWriter(p_file_name, engine='openpyxl', mode='a')
writer.book = load_workbook(p_file_name)
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
df.to_excel(writer, 'Data', startrow=10, startcol=20)
writer.save()
book = load_workbook(xlsFilename)
writer = pd.ExcelWriter(self.xlsFilename)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name=sheetName, index=False)
writer.save()
Solution by #MaxU worked very well. I have just one suggestion:
If truncate_sheet=True is specified than "startrow" should NOT be retained from existing sheet. I suggest:
if startrow is None and sheet_name in writer.book.sheetnames:
if not truncate_sheet: # truncate_sheet would use startrow if provided (or zero below)
startrow = writer.book[sheet_name].max_row
I'd reccommend using xlwings (https://docs.xlwings.org/en/stable/api.html), it is really powerful for this application... This is how I use it:
import xlwings as xw
import pandas as pd
import xlsxwriter
# function to get the active workbook
def getActiveWorkbook():
try:
# logic from xlwings to grab the current excel file
activeWb = xw.books.active
except:
# print error message if unable to get the current workbook
print('Unable to grab the current Workbook')
pause()
exitProgram()
else:
return activeWb
# function that returns the last row number and last cell of a sheet
def getLastRow(myBook, sheetName):
lastRow = myBook.sheets[sheetName].range("A1").current_region.last_cell.row
lastCol = str(xlsxwriter.utility.xl_col_to_name(myBook.sheets[sheetName].range("A1").current_region.last_cell.column))
return str(lastRow), lastCol + str(lastRow)
activeWb = getActiveWorkbook()
df = pd.DataFrame(data=[1,2,3])
# look at worksheet = Part Number Status
sheetName = "Sheet1"
ws = activeWb.sheets[sheetName]
lastRow, lastCell = getLastRow(activeWb, sheetName)
if int(lastRow) > 1:
ws.range("A1:" + lastCell).clear()
ws.range("A1").options(index=False, header=False).value = df.fillna('')
This seems to work very well for my applications because .xlsm workbooks can be very tricky. You can execute this as a python script or turn it into and executable with pyinstaller and then run the .exe through an excel macro. You can also call VBA macros from Python using xlwings which is very useful.
You can write to an existing Excel file without overwriting data using pandas by using the pandas.DataFrame.to_excel() method and specifying the mode parameter as 'a' (append mode).
Here's an example:
import pandas as pd
# Create a sample DataFrame
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
# Write the DataFrame to an existing Excel file in append mode
df.to_excel('existing_file.xlsx', engine='openpyxl', mode='a', index=False, sheet_name='Sheet1')
Method:
Can create file if not present
Append to existing excel as per sheet name
import pandas as pd
from openpyxl import load_workbook
def write_to_excel(df, file):
try:
book = load_workbook(file)
writer = pd.ExcelWriter(file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, **kwds)
writer.save()
except FileNotFoundError as e:
df.to_excel(file, **kwds)
Usage:
df_a = pd.DataFrame(range(10), columns=["a"])
df_b = pd.DataFrame(range(10, 20), columns=["b"])
write_to_excel(df_a, "test.xlsx", sheet_name="Sheet a", columns=['a'], index=False)
write_to_excel(df_b, "test.xlsx", sheet_name="Sheet b", columns=['b'])

How to write to an existing excel file without overwriting data (using pandas)?

I use pandas to write to excel file in the following fashion:
import pandas
writer = pandas.ExcelWriter('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
Masterfile.xlsx already consists of number of different tabs. However, it does not yet contain "Main".
Pandas correctly writes to "Main" sheet, unfortunately it also deletes all other tabs.
Pandas docs says it uses openpyxl for xlsx files. Quick look through the code in ExcelWriter gives a clue that something like this might work out:
import pandas
from openpyxl import load_workbook
book = load_workbook('Masterfile.xlsx')
writer = pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl')
writer.book = book
## ExcelWriter for some reason uses writer.sheets to access the sheet.
## If you leave it empty it will not know that sheet Main is already there
## and will create a new sheet.
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
UPDATE: Starting from Pandas 1.3.0 the following function will not work properly, because functions DataFrame.to_excel() and pd.ExcelWriter() have been changed - a new if_sheet_exists parameter has been introduced, which has invalidated the function below.
Here you can find an updated version of the append_df_to_excel(), which is working for Pandas 1.3.0+.
Here is a helper function:
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
Tested with the following versions:
Pandas 1.2.3
Openpyxl 3.0.5
With openpyxlversion 2.4.0 and pandasversion 0.19.2, the process #ski came up with gets a bit simpler:
import pandas
from openpyxl import load_workbook
with pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl') as writer:
writer.book = load_workbook('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
#That's it!
Starting in pandas 0.24 you can simplify this with the mode keyword argument of ExcelWriter:
import pandas as pd
with pd.ExcelWriter('the_file.xlsx', engine='openpyxl', mode='a') as writer:
data_filtered.to_excel(writer)
I know this is an older thread, but this is the first item you find when searching, and the above solutions don't work if you need to retain charts in a workbook that you already have created. In that case, xlwings is a better option - it allows you to write to the excel book and keeps the charts/chart data.
simple example:
import xlwings as xw
import pandas as pd
#create DF
months = ['2017-01','2017-02','2017-03','2017-04','2017-05','2017-06','2017-07','2017-08','2017-09','2017-10','2017-11','2017-12']
value1 = [x * 5+5 for x in range(len(months))]
df = pd.DataFrame(value1, index = months, columns = ['value1'])
df['value2'] = df['value1']+5
df['value3'] = df['value2']+5
#load workbook that has a chart in it
wb = xw.Book('C:\\data\\bookwithChart.xlsx')
ws = wb.sheets['chartData']
ws.range('A1').options(index=False).value = df
wb = xw.Book('C:\\data\\bookwithChart_updated.xlsx')
xw.apps[0].quit()
Old question, but I am guessing some people still search for this - so...
I find this method nice because all worksheets are loaded into a dictionary of sheet name and dataframe pairs, created by pandas with the sheetname=None option. It is simple to add, delete or modify worksheets between reading the spreadsheet into the dict format and writing it back from the dict. For me the xlsxwriter works better than openpyxl for this particular task in terms of speed and format.
Note: future versions of pandas (0.21.0+) will change the "sheetname" parameter to "sheet_name".
# read a single or multi-sheet excel file
# (returns dict of sheetname(s), dataframe(s))
ws_dict = pd.read_excel(excel_file_path,
sheetname=None)
# all worksheets are accessible as dataframes.
# easy to change a worksheet as a dataframe:
mod_df = ws_dict['existing_worksheet']
# do work on mod_df...then reassign
ws_dict['existing_worksheet'] = mod_df
# add a dataframe to the workbook as a new worksheet with
# ws name, df as dict key, value:
ws_dict['new_worksheet'] = some_other_dataframe
# when done, write dictionary back to excel...
# xlsxwriter honors datetime and date formats
# (only included as example)...
with pd.ExcelWriter(excel_file_path,
engine='xlsxwriter',
datetime_format='yyyy-mm-dd',
date_format='yyyy-mm-dd') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
For the example in the 2013 question:
ws_dict = pd.read_excel('Masterfile.xlsx',
sheetname=None)
ws_dict['Main'] = data_filtered[['Diff1', 'Diff2']]
with pd.ExcelWriter('Masterfile.xlsx',
engine='xlsxwriter') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
There is a better solution in pandas 0.24:
with pd.ExcelWriter(path, mode='a') as writer:
s.to_excel(writer, sheet_name='another sheet', index=False)
before:
after:
so upgrade your pandas now:
pip install --upgrade pandas
The solution of #MaxU is not working for the updated version of python and related packages. It raises the error:
"zipfile.BadZipFile: File is not a zip file"
I generated a new version of the function that works fine with the updated version of python and related packages and tested with python: 3.9 | openpyxl: 3.0.6 | pandas: 1.2.3
In addition I added more features to the helper function:
Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
Here the function:
import pandas as pd
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Parameters:
filename : File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
df : dataframe to save to workbook
sheet_name : Name of sheet which will contain DataFrame.
(default: 'Sheet1')
startrow : upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
truncate_sheet : truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
resizeColumns: default = True . It resize all columns based on cell content width
to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
[can be dictionary]
na_rep: default = 'NA'. If, instead of NaN, you want blank cells, just edit as follows: na_rep=''
Returns: None
*******************
CONTRIBUTION:
Current helper function generated by [Baggio]: https://stackoverflow.com/users/14302009/baggio?tab=profile
Contributions to the current helper function: https://stackoverflow.com/users/4046632/buran?tab=profile
Original helper function: (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
Features of the new helper function:
1) Now it works with python 3.9 and latest versions of pandas and openpxl
---> Fixed the error: "zipfile.BadZipFile: File is not a zip file".
2) Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
3) You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
4) Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
*******************
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs)
if resizeColumns:
ws = writer.book[sheet_name]
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
# save the workbook
writer.save()
Example Usage:
# Create a sample dataframe
df = pd.DataFrame({'numbers': [1, 2, 3],
'colors': ['red', 'white', 'blue'],
'colorsTwo': ['yellow', 'white', 'blue'],
'NaNcheck': [float('NaN'), 1, float('NaN')],
})
# EDIT YOUR PATH FOR THE EXPORT
filename = r"C:\DataScience\df.xlsx"
# RUN ONE BY ONE IN ROW THE FOLLOWING LINES, TO SEE THE DIFFERENT UPDATES TO THE EXCELFILE
append_df_to_excel(filename, df, index=False, startrow=0) # Basic Export of df in default sheet (Sheet1)
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0) # Append the sheet "Cool" where "df" is written
append_df_to_excel(filename, df, sheet_name="Cool", index=False) # Append another "df" to the sheet "Cool", just below the other "df" instance
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0, startcol=5) # Append another "df" to the sheet "Cool" starting from col 5
append_df_to_excel(filename, df, index=False, truncate_sheet=True, startrow=10, na_rep = '') # Override (truncate) the "Sheet1", writing the df from row 10, and showing blank cells instead of NaN
def append_sheet_to_master(self, master_file_path, current_file_path, sheet_name):
try:
master_book = load_workbook(master_file_path)
master_writer = pandas.ExcelWriter(master_file_path, engine='openpyxl')
master_writer.book = master_book
master_writer.sheets = dict((ws.title, ws) for ws in master_book.worksheets)
current_frames = pandas.ExcelFile(current_file_path).parse(pandas.ExcelFile(current_file_path).sheet_names[0],
header=None,
index_col=None)
current_frames.to_excel(master_writer, sheet_name, index=None, header=False)
master_writer.save()
except Exception as e:
raise e
This works perfectly fine only thing is that formatting of the master file(file to which we add new sheet) is lost.
writer = pd.ExcelWriter('prueba1.xlsx'engine='openpyxl',keep_date_col=True)
The "keep_date_col" hope help you
I used the answer described here
from openpyxl import load_workbook
writer = pd.ExcelWriter(p_file_name, engine='openpyxl', mode='a')
writer.book = load_workbook(p_file_name)
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
df.to_excel(writer, 'Data', startrow=10, startcol=20)
writer.save()
book = load_workbook(xlsFilename)
writer = pd.ExcelWriter(self.xlsFilename)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name=sheetName, index=False)
writer.save()
Solution by #MaxU worked very well. I have just one suggestion:
If truncate_sheet=True is specified than "startrow" should NOT be retained from existing sheet. I suggest:
if startrow is None and sheet_name in writer.book.sheetnames:
if not truncate_sheet: # truncate_sheet would use startrow if provided (or zero below)
startrow = writer.book[sheet_name].max_row
I'd reccommend using xlwings (https://docs.xlwings.org/en/stable/api.html), it is really powerful for this application... This is how I use it:
import xlwings as xw
import pandas as pd
import xlsxwriter
# function to get the active workbook
def getActiveWorkbook():
try:
# logic from xlwings to grab the current excel file
activeWb = xw.books.active
except:
# print error message if unable to get the current workbook
print('Unable to grab the current Workbook')
pause()
exitProgram()
else:
return activeWb
# function that returns the last row number and last cell of a sheet
def getLastRow(myBook, sheetName):
lastRow = myBook.sheets[sheetName].range("A1").current_region.last_cell.row
lastCol = str(xlsxwriter.utility.xl_col_to_name(myBook.sheets[sheetName].range("A1").current_region.last_cell.column))
return str(lastRow), lastCol + str(lastRow)
activeWb = getActiveWorkbook()
df = pd.DataFrame(data=[1,2,3])
# look at worksheet = Part Number Status
sheetName = "Sheet1"
ws = activeWb.sheets[sheetName]
lastRow, lastCell = getLastRow(activeWb, sheetName)
if int(lastRow) > 1:
ws.range("A1:" + lastCell).clear()
ws.range("A1").options(index=False, header=False).value = df.fillna('')
This seems to work very well for my applications because .xlsm workbooks can be very tricky. You can execute this as a python script or turn it into and executable with pyinstaller and then run the .exe through an excel macro. You can also call VBA macros from Python using xlwings which is very useful.
You can write to an existing Excel file without overwriting data using pandas by using the pandas.DataFrame.to_excel() method and specifying the mode parameter as 'a' (append mode).
Here's an example:
import pandas as pd
# Create a sample DataFrame
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
# Write the DataFrame to an existing Excel file in append mode
df.to_excel('existing_file.xlsx', engine='openpyxl', mode='a', index=False, sheet_name='Sheet1')
Method:
Can create file if not present
Append to existing excel as per sheet name
import pandas as pd
from openpyxl import load_workbook
def write_to_excel(df, file):
try:
book = load_workbook(file)
writer = pd.ExcelWriter(file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, **kwds)
writer.save()
except FileNotFoundError as e:
df.to_excel(file, **kwds)
Usage:
df_a = pd.DataFrame(range(10), columns=["a"])
df_b = pd.DataFrame(range(10, 20), columns=["b"])
write_to_excel(df_a, "test.xlsx", sheet_name="Sheet a", columns=['a'], index=False)
write_to_excel(df_b, "test.xlsx", sheet_name="Sheet b", columns=['b'])

Categories

Resources