Search and find values in two excel sheets(xlrd to openpyxl)

Search and find values in two excel sheets(xlrd to openpyxl) - python

wrbk = xlrd.open_workbook("D:Book1.xlsx")
idx = 0
book_1 = xlrd.open_workbook("D:Book2.xlsx")
sh_1 = book_1.sheet_by_name('Sheet4')
i = 0
for x in range(sh_1.nrows):
i = i + 1
if i >= sh_1.nrows:
break
if sh_1.cell(i, 2).value:
concat = sh_1.cell(i, 2).value
for y in range(len(wrbk.sheets())):
sht = wrbk.sheet_by_index(y)
for j in range(sht.ncols):
for cell in range(sht.nrows):
list = str(sht.cell(cell, j).value)
if list.__contains__(concat):
print(sh_1.cell(i, 2).value)
Im using this code to find a value in a workbook and then search that value in another workbook.
I'm using xlrd, the output is fine so far but i can't read and write with xlrd.i need suggestions to change this code from xlrd to openpyxl.

This defines a function to do the search and uses a Regular Expression to do the 'contains' match. Change the print to suit.
from openpyxl import load_workbook
import re
# open workbook
excel_file1 = 'D:Book1.xlsx'
wb1 = load_workbook(excel_file1) # wrbk
ws1 = wb1["Sheet1"]
excel_file2 = 'D:Book2.xlsx'
wb2 = load_workbook(excel_file2) # book_1
ws2 = wb2["Sheet4"] # sh_1
# fn to search all sheets in workbook
def myfind(wb,s):
for ws in wb.worksheets:
for c in range(1,ws.max_column+1):
for r in range(1,ws.max_row+1):
txt = ws.cell(r,c).value
if txt is None:
pass
elif re.search(s,txt):
print("Found",s,txt,ws,r,c)
# scan col C
for r in range(1,ws2.max_row+1):
s = ws2.cell(r, 3).value
if s is None:
pass
else:
print(r,s)
myfind(wb1,s)

Related

Excel pandas very slow collecting Data from XLSX file - slow script - Python

Hey I wanted to get quick Output for slicing my Source XLSX file for collect Data from Cell on index: 11, but my script seems to working very slow.
Expected output is Collected items from column index(11) Cell when on column index(16) Cell value = None. Script check from begin every row on column index(16) if value == None, but on my file i have thousands positions before script start collecting Data,
Can I speed up this process or find faster way?
XLUtils.py:
import openpyxl
def getRowCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_row)
def getColumnCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_column)
def readData(file,sheetName,rownum,columnno):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return sheet.cell(row=rownum, column=columnno).value
def writeData(file,sheetName,rownum,columno,data):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
sheet.cell(row=rownum, column=columno).value = data
workbook.save(file)
My Script:
import pandas as pd
import XLUtils
from openpyxl import Workbook
from datetime import datetime
def LISTING_GENERATOR():
#This function Create product list file for current day
i = 1
i_range = 50
r = 1
x = 1
rows = 50
LISTED_DATE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,16)
if LISTED_DATE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(ASIN)
wb.save('Product_'+TODAY_DATE + '.xlsx')
print('File has been created: ',FILE)
for r in range(2,rows+1):
CHECK_ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 11)
if CHECK_ASIN == None:
print('No more ASIN avaiable')
break
FE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 16)
if FE != None:
print('Product last added: ',FE)
if FE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(f'ASIN nr. {i}: {ASIN}')
LIST.append(ASIN)
XLUtils.writeData(FILE,'Sheet',x,1,ASIN)
XLUtils.writeData(PRODUCT_RESEARCH,'Sheet1',r,16,TODAY_DATE_XLSX)
x+=1
i+=1
if i >= i_range:
print(f'List of {i_range} items, has been Created.')
break
else:
print('Error: product on the list')
XLUtils.writeData(FILE,'Sheet',x,1,' ')
print('Created list:\n',LIST)
print('__________________________________')
ALL_ITEMS = (TODAY_DATE + '.xlsx')
print('CSV file has been named: ', ALL_ITEMS)
DATA_XLS = pd.read_excel(FILE, 'Sheet', dtype=str, index_col=None)
DATA_XLS.to_csv(PRODUCT_NAME+'.csv', encoding='utf-8', index=False)
if __name__ == '__main__':
#---Product_list_generator ---# Variable --
wb = Workbook()
LIST = []
TODAY_DATE = datetime.today().strftime('%d_%m_%Y')
TODAY_DATE_XLSX = datetime.today().strftime('%d/%m/%Y')
PRODUCT_RESEARCH = ('Product_Research_copy2.xlsx') #<--- xlsx File
FILE = ('Product_'+TODAY_DATE + '.xlsx')
PRODUCT_NAME = ('Product_'+TODAY_DATE)
LISTING_GENERATOR()

Read and Write multiple excel data into one excel file using openpyxl

I am trying to copy the data from multiple excel into one excel. I am novice to python and openpyxl. So i have opened each file and went row by row and copied them. I want to do this with multiple files. How do i loop through row and columns and copy the data consider the column in all the files are same order?
import openpyxl as xl
from openpyxl import workbook
incident_wb = xl.load_workbook('incident resolved yesterday.xlsx')
incident_sheet = incident_wb['Page 1']
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
combined_wb.save('combined_sheet.xlsx')
for row in range(1, incident_sheet.max_row+1):
incident_no = incident_sheet.cell(row,1)
opened_date = incident_sheet.cell(row,2)
shrt_desc = incident_sheet.cell(row,3)
requester = incident_sheet.cell(row,4)
incdnt_type = incident_sheet.cell(row,5)
priority = incident_sheet.cell(row,6)
assgn_grp = incident_sheet.cell(row,7)
assgn_to = incident_sheet.cell(row,8)
updated = incident_sheet.cell(row,9)
status = incident_sheet.cell(row,10)
sub_status = incident_sheet.cell(row,11)
##copy the data into the new sheet
incident_no_1 = combined_sheet.cell(row,1)
incident_no_1.value = incident_no.value
opened_date_1 = combined_sheet.cell(row,2)
opened_date_1.value = opened_date.value
shrt_desc_1 = combined_sheet.cell(row,3)
shrt_desc_1.value = shrt_desc.value
requester_1 = combined_sheet.cell(row,4)
requester_1.value = requester.value
incdnt_type_1 = combined_sheet.cell(row,5)
incdnt_type_1.value = incdnt_type.value
priority_1 = combined_sheet.cell(row,6)
priority_1.value = priority.value
assgn_grp_1 = combined_sheet.cell(row,7)
assgn_grp_1.value = assgn_grp.value
assgn_to_1 = combined_sheet.cell(row,8)
assgn_to_1.value = assgn_to.value
updated_1 = combined_sheet.cell(row,9)
updated_1.value = updated.value
status_1 = combined_sheet.cell(row,10)
status_1.value = status.value
sub_status_1 = combined_sheet.cell(row,11)
sub_status_1.value = sub_status.value
##print(f"The incident resolved yesterday {incident_no.value}")
combined_wb.save('combined_sheet.xlsx')

An alternative approach would be to build a list of date from multiple excel files and then write it to another file.
As a proof of concept:
import openpyxl as xl
from openpyxl import workbook
def provide_data(workbookName, sheetName):
wb = xl.load_workbook(workbookName)
sheet = wb[sheetName]
return [[y.value for y in x] for x in sheet.iter_rows()]
# This creates an array of rows, which contain an array of cell values.
# It will be much better to provide mapping for cells and return business object.
def save_data(list_of_sheets):
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
for sheet in list_of_sheets:
for row in sheet:
combined_sheet.append(row) # combining multiple rows.
combined_wb.save('combined_sheet.xlsx')
workSheetsToCopy = [['incident resolved yesterday.xlsx', 'Page 1'], ['other.xlsx', 'Page 1']]
workSheetsToCopy = [provide_data(x[0], x[1]) for x in workSheetsToCopy]
save_data(workSheetsToCopy)

How to copy worksheet onto a different worksheet (not as additional worksheet)

I'm trying to copy a worksheet from one excel file, to a worksheet on different excel file. But the problem is it is adding it as a additional sheet and not pasting on the existing sheet and overwriting the sheet. I know that I am using Before=wb2.Worksheets(1) which adds the new sheet before the existing sheet, but what is the argument to paste onto the existing sheet instead?
import time, os.path, os
from win32com.client import Dispatch
path1 = 'C:\\example.xlsx'
path2 = 'C:\\Todolist2.xlsx'
path3 = 'C:\\example2.xlsx'
xl = Dispatch("Excel.Application")
xl.Visible = True
wb1= xl.Workbooks.Open(Filename=path1)
wb2= xl.Workbooks.Open(Filename=path2)
ws1 = wb1.Worksheets(1)
ws1.Copy(Before=wb2.Worksheets(1))

One way to do it is using openpyxl library. If the cell has style, we can copy it to new sheet also.
import openpyxl as xl
from copy import copy
path1 = 'C:\\example.xlsx'
path2 = 'C:\\Todolist2.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
for row in ws1:
for cell in row:
ws2[cell.coordinate].value = cell.value
if cell.has_style:
ws2[cell.coordinate].font = copy(cell.font)
ws2[cell.coordinate].border = copy(cell.border)
ws2[cell.coordinate].fill = copy(cell.fill)
ws2[cell.coordinate].number_format = copy(cell.number_format)
ws2[cell.coordinate].protection = copy(cell.protection)
ws2[cell.coordinate].alignment = copy(cell.alignment)
wb2.save(path2)
Then you will see your sheet2 is replaced by sheet1.

#tomcy
The code is below. What I am really trying to accomplish is to be able to keep rewriting data to Todolist2.xlsx. I would really want to have Todolist2.xlsx open in excel application in windows and have it update the sheet whenever there is new data. So far I have found two ways to do this. One is the code you are helping me with using openpyxl. Doing it this way I think I will have to write data to Todolist2 then open. Then with new data, it will have to close before writing data back in. Then reopen it again. Below is what I have so far. Using the 10 sleep to allow me the chance to update example.xlsx so as to simulate writing new data to Todolist2. It works the first go, but on the second, it gives me permission denied to Todolist2.
import openpyxl as xl
from copy import copy
import time
path1 = 'C:\\example.xlsx'
path2 = 'C:\\Todolist2.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
while True:
for row in ws1:
for cell in row:
ws2[cell.coordinate].value = cell.value
if cell.has_style:
ws2[cell.coordinate].font = copy(cell.font)
ws2[cell.coordinate].border = copy(cell.border)
ws2[cell.coordinate].fill = copy(cell.fill)
ws2[cell.coordinate].number_format =
copy(cell.number_format)
ws2[cell.coordinate].protection = copy(cell.protection)
ws2[cell.coordinate].alignment = copy(cell.alignment)
wb2.save(path2)
wb2.close()
time.sleep(10) #during this time I will modify example.xlsx and
#save, so on the next go around it rewrites the
#new data to Todolist1.xlsx
The second way I'm trying to solve this is with win32com. This allows me to keep Todolist2 open in excel in windows while it writes to it from example, example1, then example2. But the problem is, it does not write on the activesheet, it keeps adding additional sheets. So on this one, If I can find a way to keep rewriting over the active sheet in Todolist2 or after it adds the additional sheet, if I can only delete one sheet i'm golden.
import time, os.path, os
from win32com.client import Dispatch
path1 = 'C:\\example.xlsx'
path2 = 'C:\\Todolist2.xlsx'
path3 = 'C:\\example2.xlsx'
path4 = 'C:\\example3.xlsx'
xl = Dispatch("Excel.Application")
xl.Visible = True
wb1= xl.Workbooks.Open(Filename=path1)
wb2= xl.Workbooks.Open(Filename=path2)
ws1 = wb1.Worksheets(1)
ws1.Copy(Before=wb2.Worksheets(1))
time.sleep(5)
wb3= xl.Workbooks.Open(Filename=path3)
ws3 = wb3.Worksheets(1)
ws2 = wb2.Worksheets(3) #it seems by using (3) is the only way it
#allows me to delete one sheet before it
#adds another.
ws2.Delete()
ws3.Copy(Before=wb2.Worksheets(1))
time.sleep(5)
wb4= xl.Workbooks.Open(Filename=path4)
ws4 = wb4.Worksheets(1)
ws2.Delete() #I got into trouble here, and throws error even
#though it does the delete and copy
ws4.Copy(Before=wb2.Worksheets(1))

How to copy a worksheet from one excel file to another?
from openpyxl import load_workbook
from copy import copy
def copySheet(target, source):
for (row, col), source_cell in source._cells.items():
target_cell = target.cell(column=col, row=row)
target_cell._value = source_cell._value
target_cell.data_type = source_cell.data_type
if source_cell.has_style:
target_cell.font = copy(source_cell.font)
target_cell.border = copy(source_cell.border)
target_cell.fill = copy(source_cell.fill)
target_cell.number_format = copy(source_cell.number_format)
target_cell.protection = copy(source_cell.protection)
target_cell.alignment = copy(source_cell.alignment)
if source_cell.hyperlink:
target_cell._hyperlink = copy(source_cell.hyperlink)
if source_cell.comment:
target_cell.comment = copy(source_cell.comment)
for attr in ('row_dimensions', 'column_dimensions'):
src = getattr(source, attr)
trg = getattr(target, attr)
for key, dim in src.items():
trg[key] = copy(dim)
trg[key].worksheet = trg
target.sheet_format = copy(source.sheet_format)
target.sheet_properties = copy(source.sheet_properties)
target.merged_cells = copy(source.merged_cells)
target.page_margins = copy(source.page_margins)
target.page_setup = copy(source.page_setup)
target.print_options = copy(source.print_options)
"copy to"
wb1 = load_workbook(path_to)
target = wb1.create_sheet("lol")
"copy from"
wb2 = load_workbook(path_from)
source = wb2.active
copySheet(target=target, source=source)
wb1.save("fusion.xlsx")

Openpyxl border line breaks when copying data from one sheet and then pasting it to another excel sheet

what I was trying to do using Openpyxl is that I copy some cell's values from excel sheet and then open another excel sheet and paste those values.
Unexpectedly I got the sheet where the border lines broke, especially combined cells.
I'd like to hear your opinion. Also, is there a better way to loop through cells? Currently it is only three cells, but what if it is over 100....
Thank you so much.
from openpyxl import load_workbook
import os
from os.path import exists
target_file = input('ex)201711 ')
wb = load_workbook(target_file + '.xlsm', data_only=True)
sheet = wb['summary']
num_people = len(sheet['A:A'])
for i in range(1, num_people):
val_1 = sheet.cell(row=i + 1, column=1).value
val_2 = sheet.cell(row=i + 1, column=2).value
val_3 = sheet.cell(row=i + 1, column=3).value
file_name = '2017(' + val_1 + ').xlsx'
if not exists(file_name):
continue
else:
wb = load_workbook(file_name, data_only=True)
sheet2 = wb['summary2']
sheet2.cell(row=1, column=1).value = val_1
sheet2.cell(row=1, column=2).value = val_2
sheet2.cell(row=1, column=3).value = val_3
wb.save(file_name)

I might be misunderstanding the question, but a nested for loop looks ideal...
from openpyxl import load_workbook
import os
from os.path import exists
target_file = input('ex)201711 ')
wb = load_workbook(target_file + '.xlsm', data_only=True)
sheet = wb['summary']
num_people = len(sheet['A:A'])
num_vals = 3
for i in range(1, num_people):
for j in range(1,numvals+1):
val[j] = sheet.cell(row=i + 1, column=j).value
file_name = '2017(' + val[1] + ').xlsx'
if not exists(file_name):
continue
else:
wb = load_workbook(file_name, data_only=True)
sheet2 = wb['summary2']
for j in range(1,numvals+1):
sheet2.cell(row=1, column=j).value = val[j]
wb.save(file_name)
Additionally, openpyxl now supports array-index style cell referencing, which can make things easier to read/write:
#these are equivalent
sheet2.cell(row=1, column=j).value = val[j]
sheet2.[openpyxl.utils.get_column_letter(j) + '1'] = val[j]
With regards to formatting borders, in openpyxl it is (unfortunately) necessary to explicitly specify all borders on merged cells, unlike the default excel action where the styling of the topleft cell is applied over the merge.

getting formatting data in openpyxl

I'm having trouble extracting the styles from an excel worksheet using openpyxl, in the case below I'm creating a spreadsheet, and I can see that the formatting is correct, but I don't know how to get that data back.
In my real use case I'm just reading a file - I'm not the one creating it, so I'd like to be able to programmatically retrieve the formatting.
from openpyxl import Workbook
from openpyxl.reader.excel import load_workbook
from openpyxl.style import Color, Fill
#this is all setup
wb = Workbook()
dest_filename = 'c:\\temp\\test.xlsx'
ws = wb.worksheets[0]
ws.title = 'test'
ws.cell('A1').value = 'foo'
ws.cell('A1').style.font.bold = True
ws.cell('B1').value = 'bar'
ws.cell('B1').style.fill.fill_type = Fill.FILL_SOLID
ws.cell('B1').style.fill.start_color.index = Color.DARKYELLOW
wb.save(filename = dest_filename )
#setup complete
book = load_workbook( filename = dest_filename )
sheet = book.get_sheet_by_name('test')
#value work properly
print sheet.cell('A1').value #returns foo
print sheet.cell('B1').value #return bar
#formatting does not - THIS IS THE PROBLEM CODE
print sheet.cell('A1').style.font.bold #returns False
print sheet.cell('B1').style.fill.fill_type #returns none
print sheet.cell('B1').style.fill.start_color.index #returns FFFFFFFF
print sheet.cell('B1').has_style #returns true
#but these 2 return the same values! even thought C1 was never set and should be different
print sheet.get_style('A1')
print sheet.get_style('C1')

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Search and find values in two excel sheets(xlrd to openpyxl) - python

Related

Excel pandas very slow collecting Data from XLSX file - slow script - Python

Read and Write multiple excel data into one excel file using openpyxl

How to copy worksheet onto a different worksheet (not as additional worksheet)

Openpyxl border line breaks when copying data from one sheet and then pasting it to another excel sheet

getting formatting data in openpyxl

Categories

Resources