Python openpyxl .xlsx if column is X print current row - python

I can't seem to figure this out.
I am trying to go through a whole column in a spreadsheet and return the row of data if value is found.
Any help would be appreciated..

Please Import xlrd and run this code and modify as you
import xlrd
workbook = xlrd.open_workbook('file.xls')
for s in workbook.sheets():
for r in range(s.nrows):
for c in range(s.ncols):
print "row::::: ", r
print "column:: ", c
print "value::: ", s.cell(r,c).value

The simplest thing is simply to loop through all the rows of the worksheet, check the cell in the relevant column and break when you get the value.
from openpyxl import load_workbook
wb = load_workbook(filename)
ws = wb[sheetname]
for row in ws:
cell = row[0] # for column 'A'
if cell.value == sentinel:
break
# do what you want with the row

Related

Python openpyxl to automate entire column in excel

import openpyxl
i=2
workbook= openpyxl.load_workbook()
sheet = workbook.active
for i, cellObj in enumerate (sheet['I'],2):
cellObj.value = '=IF(ISNUMBER(A2)*(A2<>0),A2,IF(ISNUMBER(F2)*(F2<>0),F2,IF(ISBLANK(A2)*ISBLANK(F2)*ISBLANK(H2),0,H2)))'
workbook.save()
Using openpxl, I tried to apply formula to entire column 'I' its not working as per the formula, I wanted formula to start from I2 but its start from I1 and wrong output as well.
I have attached a screenshot.
.
Can someone please correct the code?
Output of print(list(enumerate(sheet['I']))):
You'd probably be better off to do it this way, auto skip row 1 by starting the iteration at row 2 and update the formula using the cell row number.
import openpyxl
excelfile = 'foo.xlsx'
workbook= openpyxl.load_workbook(excelfile)
sheet = workbook.active
mr = sheet.max_row # Last row to add formula to
for row in sheet.iter_rows(min_col=9, max_col=9, min_row=2, max_row=mr):
for cell in row:
cr = cell.row # Get the current row number to use in formula
cell.value = f'=IF(ISNUMBER(A{cr})*(A{cr} <> 0), A{cr}, IF(ISNUMBER(F{cr})*(F{cr} <> 0), F{cr}, IF(ISBLANK(A{cr})*ISBLANK(F{cr})*ISBLANK(H{cr}), 0, H{cr})))'
workbook.save(excelfile)
If you know the from and to row numbers, then you can use it like this:
from openpyxl import load_workbook
wb = load_workbook(filename="/content/sample_data/Book1.xlsx")
ws = wb.active
from_row = 2
to_row = 4
for i in range(from_row, to_row+1):
ws[f"C{i}"] = f'=_xlfn.CONCAT(A{i}, "_", B{i})'
wb.save("/content/sample_data/formula.xlsx")
Input (Book1.xlsx):
Output (formula.xlsx):
I don't have your data, so I did not test the following formula; but your formula can be translated to format string as:
for i in range(from_row, to_row+1):
ws[f"I{i}"] = f'=IF(ISNUMBER(A{i})*(A{i}<>0),A{i},IF(ISNUMBER(F{i})*(F{i}<>0),F{i},IF(ISBLANK(A{i})*ISBLANK(F{i})*ISBLANK(H{i}),0,H{i})))'
It formats the formula as:
=IF(ISNUMBER(A2)*(A2<>0),A2,IF(ISNUMBER(F2)*(F2<>0),F2,IF(ISBLANK(A2)*ISBLANK(F2)*ISBLANK(H2),0,H2)))
=IF(ISNUMBER(A3)*(A3<>0),A3,IF(ISNUMBER(F3)*(F3<>0),F3,IF(ISBLANK(A3)*ISBLANK(F3)*ISBLANK(H3),0,H3)))
=IF(ISNUMBER(A4)*(A4<>0),A4,IF(ISNUMBER(F4)*(F4<>0),F4,IF(ISBLANK(A4)*ISBLANK(F4)*ISBLANK(H4),0,H4)))

If condition based on cell value in excel using Openpyxl

There are two excel files, where the data on condition should be appended to another excel file.
CONDITION: If Any value in Column A is equal to 'x' then it should get value from col B and get it appended directly to col A/B in excel file 2.
The below table is present in Excel File 1.
The below should be the output... which is in Excel file 2.
Am new to this.. please help with this code, and preferably if code is done using "Openpyxl", it would be much helpful !
Thanks in advance.
A slight improvement on Redox's solution:
import openpyxl
#Open Input File open (file1)
wb1 = openpyxl.load_workbook('file1.xlsx')
ws1 = wb1['Sheet1']
wb2 = openpyxl.Workbook()
ws2 = wb2.active
ws2.append(["Base", "A/B"])
for row in ws1.iter_rows(min_row=2, max_col=3, values_only=True):
base, a, b = row
if a != "x":
new_row = [base, a]
else:
new_row = [base, b]
ws2.append(new_row)
Ideally you should also check that the third column has a valid value.
So, a simple solution and a more complicated one:
Then between files you can use a link or index() or indirect().
To do this using python-openpyxl, you can use the below code... added comments so it is easy to understand... hope this helps. Let me know in case of questions.
The python code
import openpyxl
#Open Input File open (file1)
wb1 = openpyxl.load_workbook('file1.xlsx')
ws1 = wb1['Sheet1']
#Create new file for Output (file2)
wb2 = openpyxl.Workbook()
ws2 = wb2.active
#Add header to output file
ws2.cell(row=1,column=1).value = "BASE"
ws2.cell(row=1,column=2).value = "A/B"
# Iterate through each line in input file from row 2 (skipping header) to last row
for row in ws1.iter_rows(min_row=2, max_row=ws1.max_row, min_col=1, max_col=3):
for col, cell in enumerate(row):
if col == 0: #First column, write to output
ws2.cell(cell.row, col+1).value = cell.value
elif col == 1:
if cell.value != "X": #2nd column, write to output if not X
ws2.cell(cell.row, col+1).value = cell.value
else: #2nd column, write 3rd column if X
ws2.cell(cell.row, col+1).value = ws1.cell(cell.row, col+2).value
wb2.save('file2.xlsx')
Output excel after running

How to find next empty cell on a specific column and write to it?

I want to find the next empty cell in a specific column and write to values that cell. I've tried it using following method:
for row in sheet['A{}:A{}'.format(sheet.min_row,sheet.max_row)]:
if row is None:
sheet.cell(column=1).value = name
else:
print ("Cell have data")
But It's not writing data to next empty cell. How can I fix that?
It's pretty pointless to construct a string with min_row and max_row. You can simply access the whole column:
from openpyxl import load_workbook
wb = load_workbook("book.xlsx")
ws = wb.active
for cell in ws["A"]:
if cell.value is None:
cell.value = "new value2"
wb.save("book.xlsx")
But this reads the whole column at once as a tuple. Instead, you can use iter_rows() (iter_cols() is not available in read-only):
from openpyxl import load_workbook
wb = load_workbook("book.xlsx")
ws = wb.active
for row in ws.iter_rows(min_col=1, max_col=1):
cell = row[0]
if cell.value is None:
cell.value = "new value"
wb.save("book.xlsx")

Python Openpyxl iter_rows and add defined value in each cell

Question: Can someone please let me know how I can achieve the following task:
I've defined the column, but i need a specific value to go into each cell within that column.
Also, if column 6 only has x amount of rows, then i want column 7 to also have only x amount of rows with the values pasted in it.
This is the code i've tried.
import openpyxl
wb = openpyxl.load_workbook(filename=r'C:\Users\.spyder-py3\data\BMA.xlsx')
ws = wb.worksheets[0]
for row in ws.iter_rows('G{}:G{}'.format(ws.min_row,ws.max_row)):
for cell in row:
ws.cell(row=cell, column=7).value = 'BMA'
wb.save(r'C:\Users\.spyder-py3\data\BMA.csv')
wb.close()
I figured out most of the issue by looking at this answer:
https://stackoverflow.com/a/15004956/9649146
This is the code i end up with:
import openpyxl
wb = openpyxl.load_workbook(filename=r'C:\Users\.spyder-py3\data\AAXN.xlsx')
ws = wb.worksheets[0]
r = 2
for row in ws.iter_rows('G{}:G{}'.format(ws.min_row,ws.max_row)):
for cell in row:
ws.cell(row=r, column=7).value = 'AAXN'
r += 1
wb.save(r'C:\Users\.spyder-py3\data\AAXN.csv')
wb.close()
Or, you can do something like this:
for row in filesheet.iter_rows(min_row=2, max_row=file_sheet.max_row):
filesheet.cell(row=row[0].row, column=7).value = 'my value'

xlsx writing cell_value error, writing to new worksheet

I'm trying to build a report generator which reads excel sheets and returns rows which contain values. I built a version which works as I require but only works for csv this is only my 1st code-mash-together, but it worked. I now would like to include conditional formatting as well (highlight certain cells values eg. if <65 format red) and so that required that I rewrite with xlsx sheets rather than csv.
Below is my attempt at getting this to work...
I can find the values and return the row, but on the second run through it returns an error
AttributeError: 'Worksheet' object has no attribute 'cell_value'
Which is surprising because it worked just previously and stepping through the code retuns the values I want.... I have tried changing it to .value, but returns:
AttributeError: 'function' object has no attribute 'value'
Help, I have no idea what I'm doing now. If it doens't make any sense i'm happy to post my original code for the csv to 'explain'
Thanks
import xlsxwriter
import xlrd
import os
import xlwt
# open original excelbook and access first sheet
for excelDocs in os.listdir('.'):
if not excelDocs.endswith('.xlsx'):
continue # skip non-xlsx files
workbook = xlrd.open_workbook(excelDocs)
sheet = workbook.sheet_by_index(0)
cellslist = []
i = 0
#########WORKS!#####################
for row in range(sheet.nrows):
for col in range(sheet.ncols):
if sheet.cell_value(row, col) == 'CP' or sheet.cell_value(row, col) == 'LNA' or sheet.cell_value(row, col) == 'Last Name':
i = i + 1
data = [sheet.cell_value(0, col) for col in range(sheet.ncols)]
workbook = xlsxwriter.Workbook()
sheet = workbook.add_worksheet('excelDocs')
for index, value in enumerate(data):
sheet.write(i, index, value)
workbook = xlrd.open_workbook(excelDocs)
I have no experience with xlsxwriter, xlrd or xlwt. As this is your "1st code-mash-together" I figured I would offer an alternative using openpyxl.
I do not have your data, so testing is a little difficult, but any syntax errors could be fixed. Please let me know if this does not run and I will help fix if required.
I am assuming your output is to a seperate file(report.xlsx here) and a tab for each workbook checked(each tab named for source book name).
import openpyxl
from openpyxl import *
from openpyxl.utils import get_column_letter
interestingValues = ['CP','LNA', 'LastName']
report = Workbook()
dest_filename = 'report.xlsx'
# open original excelbook and access first sheet
for excelDocs in os.listdir('.'):
if not excelDocs.endswith('.xlsx'):
continue # skip non-xlsx files
workbook = load_workbook(excelDocs)
sheet = workbook.active
workingReportSheet = report.create_sheet(str(excelDocs.split('.')[0]))
i = 0
for row in range(1,sheet.max_row):
for col in range(sheet.max_column):
columnLetter = get_column_letter(col +1)
if str(sheet['%s%s' % (columnLetter,row)].value) in interestingValues:
i += 1
data = [sheet['%s%s' % (str(get_column_letter(col)),i)].value for col in range(1,sheet.max_column +1)]
for index, value in enumerate(data):
workingReportSheet['%s%s' % (str(get_column_letter(index+1)),i)].value = value
report.save(filename = dest_filename)
Reading your code again, it may be that you are discarding your output.
Try the below.
import xlsxwriter
import xlrd
import os
import xlwt
#Create output sheet
outputworkbook = xlsxwriter.Workbook()
# open original excelbook and access first sheet
for excelDocs in os.listdir('.'):
if not excelDocs.endswith('.xlsx'):
continue # skip non-xlsx files
workbook = xlrd.open_workbook(excelDocs)
sheet = workbook.sheet_by_index(0)
cellslist = []
i = 0
outputsheet = outputworkbook.add_worksheet('excelDocs')
for row in range(sheet.nrows):
for col in range(sheet.ncols):
if sheet.cell_value(row, col) == 'CP' or sheet.cell_value(row, col) == 'LNA' or sheet.cell_value(row, col) == 'Last Name':
i = i + 1
data = [sheet.cell_value(0, col) for col in range(sheet.ncols)]
for index, value in enumerate(data):
outputsheet.write(i, index, value)

Categories

Resources