Saving an excel sheet opened with join(dir_path..) in python - python

Given below is my code:
from os.path import join
from xlutils.copy import copy
from xlrd import open_workbook,cellname
from os.path import join, dirname, abspath
import xlwt
def Trend():
fname = join(dirname(dirname(abspath(__file__))),'Data Files', 'Processed Data', 'TrendAnalysis.xls')
# Open the workbook
book = open_workbook(fname, formatting_info=True)
wb = copy(book) # a writable copy (I can't read values out of this, only write to it)
total=0.
style = xlwt.easyxf('font: bold 1, name Calibri')
style1 = xlwt.easyxf('font: name Calibri')
for i in range(2,25):
if(i==1):
pass
else:
sheet = book.sheet_by_index(i)
w_sheet = wb.get_sheet(i) # the sheet to write to within the writable copy
cols = sheet.ncols # Number of columns
rows = sheet.nrows # Number of rows
for column in range(1,cols):
for row in range(1,rows):
if(sheet.cell(row,column).value == '-'):
pass
else:
total=total+sheet.cell(row,column).value
w_sheet.write(row+1, column, total, style1)
total=0
w_sheet.write(row+1,0, 'TOTAL', style)
i=i+1
s=book.sheet_by_index(0)
w = wb.get_sheet(0)
cols = s.ncols # Number of columns
rows = s.nrows # Number of rows
for row in range(1,rows):
if(s.cell(row,0).value== "ISU-GOV Domestic"):
for column in range(0,3):
a=s.cell(row,column).value
b=s.cell(21,column).value
w.write(21,column,a)
w.write(row,column,b)
elif(s.cell(row,0).value== "ISU-GOV Overseas"):
for column in range(0,3):
a=s.cell(row,column).value
b=s.cell(23,column).value
w.write(row,column,b)
w.write(23,column,a)
elif(s.cell(row,0).value== "ISU-MFG (TML)"):
for column in range(0,3):
a=s.cell(row,column).value
w.write(24,column,a)
b=s.cell(20,column).value
w.write(12,column,b)
elif(s.cell(row,0).value== "NGM-INDIA"):
for column in range(0,3):
a=s.cell(row,column).value
w.write(25,column,a)
else:
c=s.cell(row,0).value
w.write(row, 0, c)
for column in range(1,cols):
for row in range(1,20):
if(s.cell(row,column).value == '-'):
pass
else:
total=total+s.cell(row,column).value
w.write(20, column, total, style1)
total=0
w.write(20,0, 'SUB TOTAL', style)
for column in range(1,cols):
for row in range(20,rows):
if(s.cell(row,column).value == '-'):
pass
else:
total=total+s.cell(row,column).value
w.write(26, column, total, style1)
total=0
w.write(26,0, 'SUB TOTAL', style)
for column in range(1,cols):
for row in range(1,rows):
if(s.cell(row,column).value == '-'):
pass
else:
total=total+s.cell(row,column).value
w.write(27, column, total, style1)
total=0
w.write(27,0, 'GRAND TOTAL', style)
wb.save('fname')
The changes made to the excel file do not get reflected. The compilation doesnt give an errors ,yet none of these changes have been made on that excel sheet. Could you please help me sort the issue .?

The line in your program that says:
wb.save('fname')
will save to a file named 'fname'.
You want to use your fname variable, rather than the literal string 'fname', so that line should read:
wb.save(fname)

Related

Is there a way to move the rows of all the sheets to specific row number using Openpyxl in Python?

I am using Openpyxl to read the excel file and get my desired output in txt file (not all the code shown below as it is irrelevant). Below is my code for reading the excel file.The test file contains 3 sheets.As you might have noticed, I am skipping 1st sheet in my excel file.The other sheets has the data that I need. The Columns that I am interested in are "Field Name" and "Type". However, as shown in below snippets, the rows are located in row 5 in sheet 1 and row 8 in sheet 2. I was wondering if I can get both sheets to have "Field Name" and "Type" to start from 7 (instead of doing manually) ? Is there any search that I can perform to make sure that I have "Field Name" and "Type" on row 7, if not can I have it corrected in the same sheet instead of creating a copy the sheet ? I checked here, unfortunately couldn't find the solution. The reason to start from row 7 is because I am taking the data from row8 onwards form the sheet and adding it to txt file.
Note: Below snapshots are demo only. My original excel file contains 10+ sheets with same issue i.e. "Field Name" and "Type" not starting from row 7
Thanks in advance for your help!
Python code:
from openpyxl import load_workbook
data_file='test.xlsx'
# Load the entire workbook.
wb = load_workbook(data_file)
skip = True
for ws in wb.worksheets:
if skip == True:
skip = False
else:
for i in range(7, ws.max_row+1):
name = ws.cell(row=i, column=1).value
print(i, name)
name1=ws.cell(row=i, column=2).value
print(name1)
....... my other code
Sheet 1
Sheet 2:
Sheet output after SO comments:
Sheet 1:
Sheet 2:
You can achieve this by using insert_rows() and delete_rows()...
Note that you need to save the file once you have added/deleted the rows.
from openpyxl import load_workbook
data_file='test.xlsx'
# Load the entire workbook.
wb = load_workbook(data_file)
skip = True
for ws in wb.worksheets:
if skip == True:
skip = False
else:
CurrentRow = 0
for row in ws.iter_rows(max_col=2):
if row[0].value == 'Field Name' and row[1].value == 'Type':
CurrentRow = row[0].row
break
else:
pass
if CurrentRow > 7:
ws.delete_rows(7, CurrentRow - 7)
elif CurrentRow < 7 and CurrentRow > 0:
ws.insert_rows(CurrentRow, 7 - CurrentRow)
wb.save('test.xlsx')
Dealing with tables
It looks like the input data in your sheet is a excel Table. You can check this by selecting the range and right-clicking (should have table option under Quick Analysis). If this is the case, you have two options.
Select a cell in table >> right click >> Table >> Convert to Range. Then the original code will run. Don't know if that works.
Written below is the code that will work if all your sheets have tables. Note that I am considering that there is only one table in each sheet. Also, the style is set to the blue format you have shared in your pics above. Borrowed code from here
from openpyxl import load_workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False, showLastColumn=False, showRowStripes=True, showColumnStripes=False)
def colnum_string(n):
string = ""
while n > 0:
n, remainder = divmod(n - 1, 26)
string = chr(65 + remainder) + string
return string
#data_file='test.xlsx'
data_file = input("Please provide the name of file you want to process: ")
# Load the entire workbook.
wb = load_workbook(data_file)
skip = True
for ws in wb.worksheets:
if skip == True:
skip = False
else:
CurrentRow = 0
tablelen = 0
for row in ws.iter_rows(max_col=2):
if row[0].value == 'Field Name' and row[1].value == 'Type':
CurrentRow = row[0].row
tablelen = ws.max_row - CurrentRow
break
else:
pass
if CurrentRow > 7:
ws.delete_rows(7, CurrentRow - 7)
resTable = Table(displayName=ws.tables.items()[0][0], ref="A7:{}{}".format("B", 7+tablelen))
resTable.tableStyleInfo = style
ws._tables[ws.tables.items()[0][0]] = resTable
elif CurrentRow < 7 and CurrentRow > 0:
ws.insert_rows(CurrentRow, 7 - CurrentRow)
resTable = Table(displayName=ws.tables.items()[0][0], ref="A7:{}{}".format("B", 7+tablelen))
resTable.tableStyleInfo = style
ws._tables[ws.tables.items()[0][0]] = resTable
#wb.save('test.xlsx')
wb.save(data_file.split('.')[0] + "_updated." + data_file.split('.')[1])
New Req - Read all xlsx files
from openpyxl import load_workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False, showLastColumn=False, showRowStripes=True, showColumnStripes=False)
def colnum_string(n):
string = ""
while n > 0:
n, remainder = divmod(n - 1, 26)
string = chr(65 + remainder) + string
return string
import os
ALLOWED_EXTENSIONS = set(['xlsx'])
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
MyPWD = os.getcwd()
for filename in os.listdir(MyPWD):
path = os.path.join(MyPWD, filename)
if os.path.isfile(path) and allowed_file(filename):
#data_file='test1.xlsx'
#data_file = input("Please provide the name of file you want to process: ")
# Load the entire workbook.
wb = load_workbook(filename)
skip = True
for ws in wb.worksheets:
if skip == True:
skip = False
else:
CurrentRow = 0
tablelen = 0
for row in ws.iter_rows(max_col=2):
if row[0].value == 'Field Name' and row[1].value == 'Type':
CurrentRow = row[0].row
tablelen = ws.max_row - CurrentRow
break
else:
pass
if CurrentRow > 7:
ws.delete_rows(7, CurrentRow - 7)
resTable = Table(displayName=ws.tables.items()[0][0], ref="A7:{}{}".format("B", 7+tablelen))
resTable.tableStyleInfo = style
ws._tables[ws.tables.items()[0][0]] = resTable
elif CurrentRow < 7 and CurrentRow > 0:
ws.insert_rows(CurrentRow, 7 - CurrentRow)
resTable = Table(displayName=ws.tables.items()[0][0], ref="A7:{}{}".format("B", 7+tablelen))
resTable.tableStyleInfo = style
ws._tables[ws.tables.items()[0][0]] = resTable
#wb.save('test2.xlsx')
wb.save(filename.split('.')[0] + "_updated." + filename.split('.')[1])

saving dataframe changes with wb.save()

Blessings,
I am trying to convert my current dataframe into a worksheet so i'll be able to save properly.
For some reason while trying to save to xlsx after editing a df using df.to_excel it overwrites to the top left row instead of editing the cells I originally changed.
ws.save() does seem to work fine though.
What I am using to write :
from datetime import date
import pandas as pd
import argparse
import logging
import sys
import os
# Create logger
logging.basicConfig(level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(filename="info.log"),
logging.StreamHandler(sys.stdout)
])
logger = logging.getLogger()
def inventory():
"""This will allow interaction within Inventory.xlsx."""
today = date.today()
computer_date = today.strftime("%m-%d-%Y")
file = "MMEX Inventory.xlsx"
df = pd.ExcelFile(file).sheet_names
# Filter sheets
counter = 0
sheets = []
for sheet in df:
if sheet == "EXTRA" or sheet == "Inventory Rules" or sheet == "Removed lines" or sheet == "EOL_Hynix_SODIMM" \
or sheet == "EV" or sheet == "LPDDR4" or sheet == "LP4":
pass
else:
counter += 1
sheets.append(sheet)
# Added arguments to take
parser = argparse.ArgumentParser(description="Will allow interaction within Inventory")
parser.add_argument("num", help="What memory are you looking for? min of 2 letters are "
"sufficient.")
parser.add_argument("-m", "--subtract", type=int, metavar='', help="Will add to mmex and subtract from cabinet")
parser.add_argument("-c", "--add", type=int, metavar='', help="Will add to cabinet and subtract from mmex")
args = parser.parse_args()
# Loop through sheets
counter = 0
for i in sheets:
if counter == len(sheets) + 1:
break
else:
# Read xlsx and current sheet
df = pd.read_excel(f"{file}", f"{sheets[counter]}")
# Compare and keep matching columns
a = df.columns
b = ['IDC S/N', 'ECC', 'Cabinet Qty', 'MMEX', 'VDR']
keep_columns = [x for x in a if x in b]
# Maximum width on output
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
# Search within IDC S/N for argument
df = df.loc[df['IDC S/N'].str.lower().str.contains(args.num.lower(), na=False), keep_columns]
df.reset_index(drop=True, inplace=True)
# Enable user to edit 'Cabinet Qty' or 'MMEX'
if args.add:
if df.empty:
pass
else:
# Check whether calculation approves
check = df.loc[df["IDC S/N"].str.lower().str.contains(args.num.lower(),
na=False), 'MMEX']
for num in check:
if num - args.add < 0:
print(f"\n\n{df}")
logger.info(f"\n\n\nYou cannot do that.\n"
f"While available quantity on MMEX is {num}\n"
f"You are trying to subtract it by {args.add}\n")
exit()
else:
pass
# Log user and changes
logger.info(f"\n\nBeing edited by - {os.getlogin()}")
logger.info(f"The following changes are being made in sheet - {sheets[counter]}\n{df}")
# Make changes to 'Cabinet Qty/MMEX'
df.loc[df['IDC S/N'].str.lower().str.contains(args.num.lower(),
na=False), 'Cabinet Qty'] += args.add
df.loc[df['IDC S/N'].str.lower().str.contains(args.num.lower(),
na=False), 'MMEX'] -= args.add
# Save changes
with pd.ExcelWriter(file,
engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
df.to_excel(writer, sheet_name=f"{sheets[counter]}")
logger.info(f"The following changes have been made \n{df}")
elif args.subtract:
if df.empty:
pass
else:
# Check whether calculation approves
check = df.loc[df["IDC S/N"].str.lower().str.contains(args.num.lower(),
na=False), 'Cabinet Qty']
for num in check:
if num - args.subtract < 0:
print(f"\n\n{df}")
logger.info(f"\n\n\nYou cannot do that.\n"
f"While available quantity on 'Cabinet Qty' is {num}\n"
f"You are trying to subtract it by {args.subtract}\n\n")
exit()
else:
pass
# Log user and changes
logger.info(f"\n\nBeing edited by - {os.getlogin()}")
logger.info(f"The following changes are being made in sheet - {sheets[counter]}\n{df}")
# Make Changes to 'Cabinet Qty/MMEX'
df.loc[df["IDC S/N"].str.lower().str.contains(args.num.lower(),
na=False), 'Cabinet Qty'] -= args.subtract
df.loc[df['IDC S/N'].str.lower().str.contains(args.num.lower(),
na=False), 'MMEX'] += args.subtract
# Save changes
with pd.ExcelWriter(file,
engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
df.to_excel(writer, sheet_name=f"{sheets[counter]}")
logger.info(f"The following changes have been made \n{df}")
else:
# Convert from float to int
try:
df['MMEX'] = df['MMEX'].astype(int)
df['VDR'] = df['VDR'].astype(int)
except KeyError:
pass
finally:
pass
# Will prevent empty dataframes when looping from sheets
if df.empty:
counter += 1
else:
print(f"\n{sheets[counter]}\n" f"{df}\n")
counter += 1
if __name__ == "__main__":
inventory()
Its output :
It basically overwrites to the top left instead of appending the current df.
While wb.save() does this instead:
Edit :
To summarize it all very shortly, I need to convert this df command df = df.loc[df['IDC S/N'].str.lower().str.contains(args.num.lower(), na=False), keep_columns]
Look within 'IDC S/N' for anything that contains args.num.lower as input and filter using only certain columns (keep_columns)
This needs to be converted into a ws commands (openpyxl), if anyone knows how to do this ill be more than happy :)
OpenPyXL documentation has a section related to working with pandas.
In your case, you'll need the openpyxl.utils.dataframe.dataframe_to_rows function.
From the documentation:
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=True, header=True):
ws.append(r)
What I used was
# Will allow conversion to letters in later use
characters = 'abcdefghijklmnopqrstuvwxyz'
# Look for IDC S/N column
counter = 0
for col in df.columns:
if col == 'IDC S/N':
print(col)
print(counter)
break
else:
counter += 1
# Find 'IDC S/N' within xlsx
if ws[f"{characters[counter]}1"].value == "IDC S/N":
print('IDC S/N Exists.')
else:
print('IDC S/N Missing, Will stop')
quit()
# Converted numbers to letters - 1 = A , 2 = B
header = characters[counter]
# Search for memory within 'IDC S/N'
counter = 0
print(header)
for cell in ws[header]:
print(cell)
From here I can edit cells because I have both header and index ..
If anyone has a more efficient alternative please do share!

I am trying to read excel file and its formulas and write them in new file by updating the formulas accordingly but got stuck to update them

I am trying to split excel spreadsheet based on columns and retain the formulas used in it. I was using openpyxl to read the formulas but got stuck as it reads formulas and write as it is but i need to modify them as well depending upon new splitted sheets.
My code is as :
enter code here
import pandas as pd
import os
from openpyxl import load_workbook
files = os.listdir("C:\\Users\\electrician\\Excel_Automation")
print("List of files: ")
for i in files:
print(i)
file_name = input("Enter the file name: ")
file = load_workbook(filename = file_name)
tabs = file.sheetnames
temp_dict1 = dict()
print("List of tabs: ")
for i,j in enumerate(tabs):
temp_dict1[i] = j
print(i,j)
tab_number = int(input("Enter tab number to split: "))
tab = pd.DataFrame(data=file[tabs[tab_number]].values)
temp_dict2 = dict()
print("List of columns: ")
for i,j in enumerate(tab[:1].values.tolist()[0]):
print(i,j)
column_number = int(input("Enter column number to split: "))
for i in set(tab[column_number]):
output_filename = temp_dict1[tab_number+1]+"_"+i+".xlsx"
df = tab[tab[column_number] == i].reset_index()
writer = pd.ExcelWriter("C:\\Users\\electrician\\Excel_Automation\\Output\\" + output_filename,engine='xlsxwriter')
for j in temp_dict1.values():
if j == temp_dict1[tab_number]:
df.to_excel(writer, sheet_name=output_filename, index=False)
else:
d = pd.DataFrame(data=file[j].values)
d.to_excel(writer,sheet_name=j,index=False)
writer.save()

Writing columns to a CSV file

I would like to update a column called Score for a specific row in a csv file. When a button is pressed, I would like the code to search the csv file until the row with the specified name is found (which is stored in variable name and randomly pulled from the csv file in a previous function called NameGenerator()), and update the relevant cell in the Score column to increment by 1.
Please note I am using an excel file saved as a .csv for this.
Any ideas how to do this? The code below does not work. Any help would be appreciated.
def Correct():
writer = csv.writer(namelist_file)
score=0
for row in writer:
if row[0] == name:
score=score+1
writer.writerow([col[1]] = score)
![The CSV file looks as follows
]1
So for example if the name tom is selected (elsewhere in the code, however stored in variable name), his score of 3 should be incremented by 1, turning into 4.
Here is what the function which pulls a random name from the csv file looks like:
def NameGenerator():
namelist_file = open('StudentNames&Questions.csv')
reader = csv.reader(namelist_file)
rownum=0
global array
array=[]
for row in reader:
if row[0] != '':
array.append(row[0])
rownum=rownum+1
length = len(array)-1
i = random.randint(1,length)
name = array[i]
return name
Can you please check if this works :
import sys
import random,csv
def update(cells):
d=""
for cell in cells:
d=d + str(cell)+","
return d[:-1]
def update_score(name):
with open('StudentNames&Questions.csv', 'r') as file:
data = file.readlines()
name_index = - 1
score_index = -1
headers = data[0]
for index,header in enumerate(headers.split(",")):
if header.strip() == 'Names':
name_index=index
if header.strip() == 'Score':
score_index=index
if name_index == -1 or score_index == -1:
print "Headers not found"
sys.exit()
for index,row in enumerate(data):
cells = row.split(",")
if cells[name_index] == name:
cells[score_index] = int(cells[score_index]) + 1
data[index]=update(cells)
with open('/Users/kgautam/tmp/tempfile-47', 'w') as file:
file.writelines(data)
def NameGenerator():
namelist_file = open('StudentNames&Questions.csv')
reader = csv.reader(namelist_file)
rownum=0
global array
array=[]
for row in reader:
if row[0] != '':
array.append(row[0])
rownum=rownum+1
length = len(array)-1
i = random.randint(1,length)
name = array[i]
return name
randome_name=NameGenerator()
update_score(randome_name)

Q: OpenPyxl checking for existing row and then updating cell

I want to check for a name column in an existing spreadsheet and if it exists I want to update a specific column with a time stamp for that row. I'm in a rut because I can't figure out how to go about this with out a for loop. The for loop will append more rows for the ones it didnt match and nothing shows up in column when I try to stamp it after matching a name with a row.
for rowNum in range(2, ws1.max_row):
log_name = ws1.cell(row=rowNum,column=1).value
if log_name == chkout_new_name_text:
print 'apple' + 'pen'
ws1.cell(row=rowNum, column=2).value = str(time.strftime("%m/%d/%y %H:%M %p"))
break
else:
continue
print 'pen' + 'pineapple'
# Normal procedure
Any help will be greatly appreciated.
Figured it out
name_text = raw_input("Please enter name: ")
matching_row_nbr = None
for rowNum in range(2, ws1.max_row + 1 ):
log_name = ws1.cell(row=rowNum,column=1).value
if log_name == name_text:
# Checks for a matching row and remembers the row number
matching_row_nbr = rowNum
break
if matching_row_nbr is not None:
# Uses the matching row number to change the cell value of the specific row
ws1.cell(row=matching_row_nbr, column=6).value = str(time.strftime("%m/%d/%y %H:%M - %p"))
wb.save(filename = active_workbook)
else:
# If the none of the rows match then continue with intended use of new data
print name_text

Categories

Resources