Create Excel from Parent excel - python

Below creates a file and then fills in the excel. I would like to create an excel file with a condition based on a single column and redeposit the excel with a prefix of the column name.
So return only where columnX = i and create and save excel file i1_CCBHC_MONTHLY_CLAIMS.XLSX
i2_CCBHC_MONTHLY_CLAIMS.XLSX
I have the build of the large "parent" excel file.
filename = 'CCBHC_Monthly_Claims.xlsx'
if os.path.isfile(filename):
wb = xw.Book(filename)
ws = wb.sheets['CCBHC_DATA']
ws.range('A1').options(index=False).value = df_ora
wb = xw.Book(filename)
xw.apps[0].quit()
else:
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
*df_ora.to_excel(writer, sheet_name='CCBHC_DATA',index=False)
wb = xw.Book(filename)
ws = wb.sheets['CCBHC_DATA']
ws.range('A1').options(in*dex=False).value = df_ora
wb = xw.Book(filename)
xw.apps[0].quit()

Related

Automatically appending Excel tables to themselves

I am attempting to write some code where for every time I run a python script a data frame (that has been made) automatically becomes a excel table in a defined folder path. However I want it to work in such a way that by re running the code the data frame would append to the end of the existing excel table, creating a new excel table. Currently I am using this code to do the data overlap:
def append_df_to_excel(filename, df, sheet_name='Sheet2', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Returns: None
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists = 'overlay')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs,header = False, index = False)
ws = writer.book[sheet_name]
if resizeColumns:
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
writer.save()
This code successfully allows me to run the code as many times as i want and append the data onto the end of the previously ran python script. However those outputted excel sheets are not in table format.
Currently my attempt to make a table is as follows:
ws = writer.book[sheet_name]
def make_table(worksheet, df):
column_settings = []
for header in df.columns:
column_settings.append( header)
table = Table(displayName="Contacts", ref="A1:" + get_column_letter(worksheet.max_column) + str(worksheet.max_row))
table._initialise_columns()
for column, value in zip(table.tableColumns, column_settings):
column.name = value
worksheet = worksheet.add_table(table)
However the column names do not update accordingly in the excel sheet, excel cites an error for this along the lines of 'had to recover/delete unworkable parts'
But also upon trying to run the script a second time the following python error:
'Table with name Contacts already exists'
Any help would be greatly appreciated!
Here is a toy data frame for testing:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])

Python openpyxl error: BadZipFile: File is not a zip file

Facing the error "BadZipFile: File is not a zip file" when loading excel workbook using openpyxl load_workbook function. How do I solve this error?
workbook = r'C:\Desktop\Test.xlsx'
worksheet = 'Data'
# create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(workbook, engine='openpyxl')
wb = load_workbook(workbook)
writer.book = workbook
writer.sheets = {x.title: x for x in wb.worksheets}
ws = writer.sheets[worksheet]
for i in range(len(vehicle_sales)):
row = list(vehicle_sales.iloc[i])
for j in range(len(vehicle_sales.columns)):
value = row[j]
ws.cell(i+2, j+1, value)
xl.writer.excel.save_workbook(wb, workbook)
The excel files were in read-only mode. I saved the file as a new file and load_workbook worked.

original sheet was removed when new data table writed to excel file

my propose:
if excel file not exist, create it and copy data table to it;
if excel file exist, copy to data table to new sheet.
but following code running, only copy to data to new sheet, original sheet in excel file was removed.
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl")
writer.wb = wb
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="sheet2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="sheet1",index=False) # write table_1 into excel file
writer.save()
writer.close()
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl") #assign engine
writer.book = wb #overwrite if no this
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="table_2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="table_1",index=False) # write table_1 into excel file
writer.save()
writer.close()

How to stop openpyxl - python from clearing my excel file every time I re-run the program?

I wrote a simple program for testing with openpyxl where I simply open the .xlsx file, input data into a certain cell, then close the program and run it again, inputting data in a different cell, but when I open the .xlsx after running the program for the second.
My assumption is that openpyxl clears the entire .xlsx file everytime you open it again, is there a way to avoid this?
Here is my code:
from openpyxl import Workbook
wb = Workbook()
dest_filename = 'teste.xlsx'
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Here is the .xlsx file after running the program for the first time
Here is the .xlsx file after running the program for the second time
You have not read the excel file at all:
Use this to read the existing workbook:
from openpyxl import Workbook,load_workbook
import os
dest_filename = 'teste.xlsx'
if os.path.isfile(dest_filename):
wb = load_workbook(filename = dest_filename)
else:
wb = Workbook()
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Output:

Importing Multiple Excel Files using OpenPyXL

I am trying to read in multiple excel files and append the data from each file into one master file. Each file will have the same headers (So I can skip the import of the first row after the initial file).
I am pretty new to both Python and the OpenPyXL module. I am able to import the first workbook without problem. My problem comes in when I need to open the subsequent file and copy the data to paste into the original worksheet.
Here is my code so far:
# Creating blank workbook
from openpyxl import Workbook
wb = Workbook()
# grab active worksheet
ws = wb.active
# Read in excel data
from openpyxl import load_workbook
wb = load_workbook('first_file.xlsx') #explicitly loading workbook, will automate later
# grab active worksheet in current workbook
ws = wb.active
#get max columns and rows
sheet = wb.get_sheet_by_name('Sheet1')
print ("Rows: ", sheet.max_row) # for debugging purposes
print ("Columns: ", sheet.max_column) # for debugging purposes
last_data_point = ws.cell(row = sheet.max_row, column = sheet.max_column).coordinate
print ("Last data point in current worksheet:", last_data_point) #for debugging purposes
#import next file and add to master
append_point = ws.cell(row = sheet.max_row + 1, column = 1).coordinate
print ("Start new data at:", append_point)
wb = load_workbook('second_file.xlsx')
sheet2 = wb.get_sheet_by_name('Sheet1')
start = ws.cell(coordinate='A2').coordinate
print("New data start: ", start)
end = ws.cell(row = sheet2.max_row, column = sheet2.max_column).coordinate
print ("New data end: ", end)
# write a value to selected cell
#sheet[append_point] = 311
#print (ws.cell(append_point).value)
#save file
wb.save('master_file.xlsx')
Thanks!
I don't really understand your code. It looks too complicated. When copying between worksheets you probably want to use ws.rows.
wb1 = load_workbook('master.xlsx')
ws2 = wb1.active
for f in files:
wb2 = load_workbook(f)
ws2 = wb2['Sheet1']
for row in ws2.rows[1:]:
ws1.append((cell.value for cell in row))

Categories

Resources