Pivot tabele automation with Python - python

I'm trying to automate pivot table creation using Python, but after runing my code I got an error: NameError: name 'run_excel' is not defined
I've wrote my script on the bassis of this article : https://towardsdatascience.com/automate-excel-with-python-pivot-table-899eab993966
Can someone explain to me how to solve this problem?
enter image description here
Here is my code:
import win32com.client as win32
import pandas as pd
import numpy as np
from pathlib import Path
import re
import sys
win32c = win32.constants
df = pd.read_csv("act 05.01.2022.csv", encoding = 'unicode_escape', engine ='python')
df = df.dropna()
df.to_excel(r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx", sheet_name = 'act 05.01.2022', index = False)
def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_filters: list, pt_fields: list):
"""
wb = workbook1 reference
ws1 = worksheet1 that contain the data
pt_ws = pivot table worksheet number
ws_name = pivot table worksheet name
pt_name = name given to pivot table
pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
"""
# pivot table location
pt_loc = len(pt_filters) + 2
# grab the pivot table source data
pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
# create the pivot table object
pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)
# selecte the pivot table work sheet and location to create the pivot table
pt_ws.Select()
pt_ws.Cells(pt_loc, 1).Select()
# Sets the rows, columns and filters of the pivot table
for field_list, field_r in ((pt_filters, win32c.xlPageField),
(pt_rows, win32c.xlRowField)):
for i, value in enumerate(field_list):
pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1
# Sets the Values of the pivot table
for field in pt_fields:
pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]
# Visiblity True or Valse
pt_ws.PivotTables(pt_name).ShowValuesRow = True
pt_ws.PivotTables(pt_name).ColumnGrand = True
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('act 05.01.2022')
# Setup and call pivot_table
ws2_name = 'pivot_table'
wb.Sheets.Add().Name = ws2_name
ws2 = wb.Sheets(ws2_name)
# update the pt_name, pt_rows, pt_cols, pt_filters, pt_fields at your preference
pt_name = 'example' # pivot table name, must be a string
pt_rows = ['decision_date'] # rows of pivot table, must be a list
# pt_cols = [] # columns of pivot table, must be a list
pt_filters = ['open_loan_count_all', 'days_between_repayments'] # filter to be applied on pivot table, must be a list
# [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format (explain the list item of pt_fields below)
pt_fields = [['id', 'Total id', win32c.xlCount, '0']]
# calculation method: xlAverage, xlSum, xlCount
pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_filters, pt_fields)
wb.Save() # save the pivot table created
# wb.Close(True)
# excel.Quit()
def main():
# sheet name for data
sheet_name = 'act 05.01.2022' # update with sheet name from your file
# file path
f_path = Path.cwd() # file in current working directory
# f_path = Path(r'c:\...\Documents') # file located somewhere else
# excel file
f_name = r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx" # change to your Excel file name
# function calls
run_excel(f_path, f_name, sheet_name)
main()
f_path = Path.cwd()
f_name = 'act 05.01.2022.xlsx'
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
wb = excel.Workbooks.Open(filename)
pvtTable = wb.Sheets("pivot_table").Range("A3").PivotTable
page_range_item = []
for i in pvtTable.PageRange:
page_range_item.append(str(i))
print(page_range_item)
pvtTable.PivotFields('open_loan_count_all', 'days_between_repayments').ClearAllFilters()

Your run_excel function is defined inside the pivot_table function therefore it's not in the scope of you main() function. Did not test this code but fixed the formatting below.
import win32com.client as win32
import pandas as pd
import numpy as np
from pathlib import Path
import re
import sys
win32c = win32.constants
df = pd.read_csv("act 05.01.2022.csv", encoding = 'unicode_escape', engine ='python')
df = df.dropna()
df.to_excel(r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx", sheet_name = 'act 05.01.2022', index = False)
def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_filters: list, pt_fields: list):
"""
wb = workbook1 reference
ws1 = worksheet1 that contain the data
pt_ws = pivot table worksheet number
ws_name = pivot table worksheet name
pt_name = name given to pivot table
pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
"""
# pivot table location
pt_loc = len(pt_filters) + 2
# grab the pivot table source data
pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
# create the pivot table object
pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)
# selecte the pivot table work sheet and location to create the pivot table
pt_ws.Select()
pt_ws.Cells(pt_loc, 1).Select()
# Sets the rows, columns and filters of the pivot table
for field_list, field_r in ((pt_filters, win32c.xlPageField),
(pt_rows, win32c.xlRowField)):
for i, value in enumerate(field_list):
pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1
# Sets the Values of the pivot table
for field in pt_fields:
pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]
# Visiblity True or Valse
pt_ws.PivotTables(pt_name).ShowValuesRow = True
pt_ws.PivotTables(pt_name).ColumnGrand = True
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('Sales')
# Setup and call pivot_table
ws2_name = 'pivot_table'
wb.Sheets.Add().Name = ws2_name
ws2 = wb.Sheets(ws2_name)
# update the pt_name, pt_rows, pt_cols, pt_filters, pt_fields at your preference
pt_name = 'example' # pivot table name, must be a string
pt_rows = ['Genre'] # rows of pivot table, must be a list
# pt_cols = [] # columns of pivot table, must be a list
pt_filters = ['Year'] # filter to be applied on pivot table, must be a list
# [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format (explain the list item of pt_fields below)
pt_fields = [['North America', 'Total Sales in North America', win32c.xlSum, '0'], # must be a list of lists
['Europe', 'Total Sales in Europe', win32c.xlSum, '0'],
['Japan', 'Total Sales in Japan', win32c.xlSum, '0'],
['Rest of World', 'Total Sales in Rest of World', win32c.xlSum, '0'],
['Global', 'Total Global Sales', win32c.xlSum, '0']]
# calculation method: xlAverage, xlSum, xlCount
pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_filters, pt_fields)
wb.Save() # save the pivot table created
# wb.Close(True)
# excel.Quit()
def main():
# sheet name for data
sheet_name = 'act 05.01.2022' # update with sheet name from your file
# file path
f_path = Path.cwd() # file in current working directory
# f_path = Path(r'c:\...\Documents') # file located somewhere else
# excel file
f_name = r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx" # change to your Excel file name
# function calls
run_excel(f_path, f_name, sheet_name)
main()
f_path = Path.cwd()
f_name = 'act 05.01.2022.xlsx'
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
wb = excel.Workbooks.Open(filename)
pvtTable = wb.Sheets("pivot_table").Range("A3").PivotTable
page_range_item = []
for i in pvtTable.PageRange:
page_range_item.append(str(i))
print(page_range_item)
pvtTable.PivotFields('open_loan_count_all', 'days_between_repayments').ClearAllFilters()

### Pivot the dataset
pivot_df = pd.pivot(df, index =['Date'], columns ='Country', values =['NewConfirmed'])
## renaming the columns
pivot_df.columns = df['Country'].sort_values().unique()

Related

Automatically appending Excel tables to themselves

I am attempting to write some code where for every time I run a python script a data frame (that has been made) automatically becomes a excel table in a defined folder path. However I want it to work in such a way that by re running the code the data frame would append to the end of the existing excel table, creating a new excel table. Currently I am using this code to do the data overlap:
def append_df_to_excel(filename, df, sheet_name='Sheet2', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Returns: None
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists = 'overlay')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs,header = False, index = False)
ws = writer.book[sheet_name]
if resizeColumns:
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
writer.save()
This code successfully allows me to run the code as many times as i want and append the data onto the end of the previously ran python script. However those outputted excel sheets are not in table format.
Currently my attempt to make a table is as follows:
ws = writer.book[sheet_name]
def make_table(worksheet, df):
column_settings = []
for header in df.columns:
column_settings.append( header)
table = Table(displayName="Contacts", ref="A1:" + get_column_letter(worksheet.max_column) + str(worksheet.max_row))
table._initialise_columns()
for column, value in zip(table.tableColumns, column_settings):
column.name = value
worksheet = worksheet.add_table(table)
However the column names do not update accordingly in the excel sheet, excel cites an error for this along the lines of 'had to recover/delete unworkable parts'
But also upon trying to run the script a second time the following python error:
'Table with name Contacts already exists'
Any help would be greatly appreciated!
Here is a toy data frame for testing:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])

Splitting Excel Data by Groupings into Separate Workbook Sheets

Background:I have a large 40MB XLSX file that contains client data which is Grouped over multiple levels, like so:
Expanded -
Not Expanded (sorry about the terrible dummy data!) -
Objective:I would like to split Client A, B C etc... and all their respective underlying data into separate sheets (named 'Client A' etc...) in a Workbook.
Question:Am I correct in assuming that there is no python library that would help with this (e.g., xlsxwriter) and that I will likely have to save into multiple pandas df before splitting and writing to the xlsx file?
Sample Data:Here is a link to some randomized sample data. In this file you will see only 1 client (the total row can be ignored) however imagine the normal file having 40 clients / groupings and sub levels.
Sample Code: this function takes the '.xlsxand writes each grouping to an appropriately named tab (e.g., 'Client A') to a separate Worksheet in a new.xlsx`. The issue with this code is that because I am basically going through and copying each cell individually, I didn't think to consider more holistically however to ensure the Groupings/Levels would be preserved. I think this code needs a complete re-write, and welcome feedback
import openpyxl
from copy import copy
from openpyxl import load_workbook
columns=['A','B','C','D','E','F','G','H','I','J','K','L']
def copy_cell(ws, row,ws_row,ws1):
for col in columns:
ws_cell=ws1[col+str(ws_row)]
new_cell = ws[col+str(row)]
if ws_cell.has_style:
new_cell.font = copy(ws_cell.font)
new_cell.border = copy(ws_cell.border)
new_cell.fill = copy(ws_cell.fill)
new_cell.number_format = copy(ws_cell.number_format)
new_cell.protection = copy(ws_cell.protection)
new_cell.alignment = copy(ws_cell.alignment)
wb1 = openpyxl.load_workbook('annonamized_test_data_to_be_split.xlsx')
ws1=wb1.active
indexs=[]
clients=[]
index=1
while ws1['A'+str(index)]:
if str(ws1['A'+str(index)].alignment.indent)=='0.0':
indexs.append(index)
clients.append(ws1['A'+str(index)].value)
if ws1['A'+str(index)].value is None:
indexs.append(index)
break
index+=1
wb1.close()
wb = openpyxl.Workbook()
ws=wb.active
start_index=1
headers=['Ownership Structure', 'Fee Schedule', 'Management Style', 'Advisory Firm', 'Inception Date', 'Days in Time Period', 'Adjusted Average Daily Balance (No Div, USD)', 'Assets Billed On (USD)',
'Effective Billing Rate', 'Billing Fees (USD)', 'Bill To Account', 'Model Type']
for y,index in enumerate(indexs):
try:
client=0
if len(clients[y])>=32:
client=clients[y][:31]
else:
client=clients[y]
wb.create_sheet(client)
ws=wb[client]
ws.column_dimensions['A'].width=35
ws.append(headers)
row_index=2
for i in range(start_index,indexs[y+1]):
ws.append([ws1[col+str(i)].value for col in columns])
copy_cell(ws,row_index,i,ws1)
row_index+=1
start_index=indexs[y+1]
except:
pass
wb.save('split_data.xlsx')
wb.close()
try:
wb1 = openpyxl.load_workbook('split_data.xlsx')
a=wb1['Sheet']
wb1.remove(a)
a=wb1['Sheet1']
wb1.remove(a)
wb1.save('split_data.xlsx')
wb1.close()
except:
pass
Please can someone point me in the right direction of a resource that might teach me how to achieve this?
from openpyxl import load_workbook
def get_client_rows(sheet):
"""Get client rows.
Skip header and then look for row dimensions without outline level
"""
return [row[0].row for row in sheet.iter_rows(2) if row[0].alignment.indent == 0.0]
return [
row_index
for row_index, row_dimension in sheet.row_dimensions.items()
if row_index > 1 and row_dimension.outline_level == 0
]
def delete_client_block(sheet, start, end):
"""
Delete rows starting from up to and including end.
"""
for row in range(start, end + 1):
sheet.row_dimensions.pop(row, None)
sheet.delete_rows(start, end - start + 1)
def split_workbook(input_file, output_file):
"""
Split workbook each main group into its own sheet.
Not too loose any formatting we copy the current sheet and remove all rows
which do not belong to extacted group.
"""
try:
workbook = load_workbook(input_file)
data_sheet = workbook.active
client_rows = get_client_rows(data_sheet)
for index, client_row in enumerate(client_rows):
# create new sheet for given client, shorten client as it might be too long
client_sheet = workbook.copy_worksheet(data_sheet)
client_sheet.title = data_sheet.cell(client_row, 1).value[:32]
# delete rows after current client if available
if index < len(client_rows) - 1:
row_after_client = client_rows[index + 1]
delete_client_block(
client_sheet, row_after_client, client_sheet.max_row
)
# delete rows before current client if available
if index > 0:
first_client_row = client_rows[0]
delete_client_block(
client_sheet, first_client_row, client_row - first_client_row + 1
)
# move left over dimensions to top of the sheet
for row_index in list(client_sheet.row_dimensions.keys()):
# skip header row dimension
if row_index > first_client_row - 1:
row_dimension = client_sheet.row_dimensions.pop(row_index)
new_index = row_index - client_row + first_client_row
row_dimension.index = new_index
client_sheet.row_dimensions[new_index] = row_dimension
del workbook[data_sheet.title]
workbook.save(output_file)
finally:
workbook.close()
if __name__ == "__main__":
# input_file = "annonamized_test_data_to_be_split.xlsx"
input_file = 'partial_Q1_Client_Billing_Data.xlsx'
# output_file = "split_data.xlsx"
output_file = "splitting_full_data.xlsx"
split_workbook(input_file, output_file)

Unable to convert pandas dataframe to csv file

I want to create a dataset.csv file from the various raw files within my input_path. However, my code didn't seem to generate the csv file.
import os
import pandas as pd
from zipfile import ZipFile
import cv2
import re
import csv
from sklearn.dummy import DummyClassifier
input_path = "../input_data/"
class RawToCSV:
def __init__(self, path_, df):
self.df = df
self.measurement_df = None
self.cls = None
self.path_ = path_
def raw_file_processing(self, path_):
# Open all the subfolders within path
for root, dirs, files in os.walk(path_):
for file in files:
with open(os.path.join(root, file), "r") as data:
df = pd.read_csv(data)
# Create the ID series by concatenating columns 1-3
df = df.assign(ID=df[['cell_id:cell_id', 'region:region', 'tile_num:tile_num']].apply(
lambda row: '_'.join([str(each) for each in row]), axis=1))
df = df.drop(columns=['cell_id:cell_id', 'region:region', 'tile_num:tile_num'])
# The class info is the tile_num (i.e, column 3) - TODO: Check if class info is tile_num
cls_col = df.iloc[2]
# Dummy-code the classes
cls = pd.get_dummies(cls_col)
# Obtain measurement info
# Normalize data against blank/empty columns
# log-transform the data
for col in df[9:]:
if re.search(r"(Blank|Empty)$", col):
background = col
else:
line = col.readline()
for data in line:
norm_data = data/ background
self.measurement_df = np.log2(norm_data)
return self.df["ID"], cls, self.measurement_df
def dataset_csv(self):
"""Col 1: ID
Col 2: class
Col 3-n: measurements"""
ids = self.df["ID"]
id_col = ids.to_frame()
cls_col = self.cls.to_frame()
frames = [id_col, cls_col, self.measurement_df]
dataset_df = pd.concat(frames)
data_csv = dataset_df.to_csv(r"../input_data/dataset.csv")
return data_csv
dataset = RawToCSV.dataset_csv(input_path)
Desired output dataset.csv
ID
Class
measurement_A
measurement_B
Sample1
value
value
value
Sample2
value
value
value
Not sure if you need to treat your url path as a row string. You should simply try:
data_csv = dataset_df.to_csv("../input_data/dataset.csv")
Ps : and as a test, start by saving your csv in the same folder where your script is:
data_csv = dataset_df.to_csv("dataset.csv")

Openpyxl can't write an excel file more than once

I am trying to write to excel files using openpyxl module. For some reason it only lets me write once. If I try to write again it raises:
PermissionError: [Errno 13] Permission denied: 'expenses.xlsx'
The excel file and python program are in the same folder on D drive. What's the problem?
from openpyxl import Workbook
from openpyxl import load_workbook
from datetime import datetime
import os
class ExpenseTracker:
def __init__(self, file_name = "expenses.xlsx"):
self.fname = file_name
self.load_wb()
def load_wb(self):
"""
if the excel file doesn't exists it creates a new one
with a sheet, and calls self.col_values() which
adds values for first two columns in row 1
"""
try:
wb = load_workbook(self.fname)
except Exception:
wb = Workbook()
wb.create_sheet("Expenses", 0)
self.col_values()
wb.save(self.fname)
finally:
self.wb = wb
self.ws = self.wb["Expenses"]
def col_values(self):
# adds values for first two columns in row 1
self.ws.cell(row = 1, column = 1).value = "Date"
self.ws.cell(row = 1, column = 2).value = "Spent"
def spend_income(self, amount):
date = datetime.now()
date_formatted = date.strftime("%d.%b %Y")
last_row = self.ws.max_row + 1
last_col = self.ws.max_column + 1
self.ws.cell(row = last_row, column = 1).value = date_formatted
self.ws.cell(row = last_row, column = 2).value = amount
# writes under the last input in cols 1 and 2
self.wb.save(self.fname)
wbook = ExpenseTracker()
wbook.spend_income(5)

Read and Write multiple excel data into one excel file using openpyxl

I am trying to copy the data from multiple excel into one excel. I am novice to python and openpyxl. So i have opened each file and went row by row and copied them. I want to do this with multiple files. How do i loop through row and columns and copy the data consider the column in all the files are same order?
import openpyxl as xl
from openpyxl import workbook
incident_wb = xl.load_workbook('incident resolved yesterday.xlsx')
incident_sheet = incident_wb['Page 1']
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
combined_wb.save('combined_sheet.xlsx')
for row in range(1, incident_sheet.max_row+1):
incident_no = incident_sheet.cell(row,1)
opened_date = incident_sheet.cell(row,2)
shrt_desc = incident_sheet.cell(row,3)
requester = incident_sheet.cell(row,4)
incdnt_type = incident_sheet.cell(row,5)
priority = incident_sheet.cell(row,6)
assgn_grp = incident_sheet.cell(row,7)
assgn_to = incident_sheet.cell(row,8)
updated = incident_sheet.cell(row,9)
status = incident_sheet.cell(row,10)
sub_status = incident_sheet.cell(row,11)
##copy the data into the new sheet
incident_no_1 = combined_sheet.cell(row,1)
incident_no_1.value = incident_no.value
opened_date_1 = combined_sheet.cell(row,2)
opened_date_1.value = opened_date.value
shrt_desc_1 = combined_sheet.cell(row,3)
shrt_desc_1.value = shrt_desc.value
requester_1 = combined_sheet.cell(row,4)
requester_1.value = requester.value
incdnt_type_1 = combined_sheet.cell(row,5)
incdnt_type_1.value = incdnt_type.value
priority_1 = combined_sheet.cell(row,6)
priority_1.value = priority.value
assgn_grp_1 = combined_sheet.cell(row,7)
assgn_grp_1.value = assgn_grp.value
assgn_to_1 = combined_sheet.cell(row,8)
assgn_to_1.value = assgn_to.value
updated_1 = combined_sheet.cell(row,9)
updated_1.value = updated.value
status_1 = combined_sheet.cell(row,10)
status_1.value = status.value
sub_status_1 = combined_sheet.cell(row,11)
sub_status_1.value = sub_status.value
##print(f"The incident resolved yesterday {incident_no.value}")
combined_wb.save('combined_sheet.xlsx')
An alternative approach would be to build a list of date from multiple excel files and then write it to another file.
As a proof of concept:
import openpyxl as xl
from openpyxl import workbook
def provide_data(workbookName, sheetName):
wb = xl.load_workbook(workbookName)
sheet = wb[sheetName]
return [[y.value for y in x] for x in sheet.iter_rows()]
# This creates an array of rows, which contain an array of cell values.
# It will be much better to provide mapping for cells and return business object.
def save_data(list_of_sheets):
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
for sheet in list_of_sheets:
for row in sheet:
combined_sheet.append(row) # combining multiple rows.
combined_wb.save('combined_sheet.xlsx')
workSheetsToCopy = [['incident resolved yesterday.xlsx', 'Page 1'], ['other.xlsx', 'Page 1']]
workSheetsToCopy = [provide_data(x[0], x[1]) for x in workSheetsToCopy]
save_data(workSheetsToCopy)

Categories

Resources