Excel pandas very slow collecting Data from XLSX file - slow script - Python - python

Hey I wanted to get quick Output for slicing my Source XLSX file for collect Data from Cell on index: 11, but my script seems to working very slow.
Expected output is Collected items from column index(11) Cell when on column index(16) Cell value = None. Script check from begin every row on column index(16) if value == None, but on my file i have thousands positions before script start collecting Data,
Can I speed up this process or find faster way?
XLUtils.py:
import openpyxl
def getRowCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_row)
def getColumnCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_column)
def readData(file,sheetName,rownum,columnno):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return sheet.cell(row=rownum, column=columnno).value
def writeData(file,sheetName,rownum,columno,data):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
sheet.cell(row=rownum, column=columno).value = data
workbook.save(file)
My Script:
import pandas as pd
import XLUtils
from openpyxl import Workbook
from datetime import datetime
def LISTING_GENERATOR():
#This function Create product list file for current day
i = 1
i_range = 50
r = 1
x = 1
rows = 50
LISTED_DATE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,16)
if LISTED_DATE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(ASIN)
wb.save('Product_'+TODAY_DATE + '.xlsx')
print('File has been created: ',FILE)
for r in range(2,rows+1):
CHECK_ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 11)
if CHECK_ASIN == None:
print('No more ASIN avaiable')
break
FE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 16)
if FE != None:
print('Product last added: ',FE)
if FE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(f'ASIN nr. {i}: {ASIN}')
LIST.append(ASIN)
XLUtils.writeData(FILE,'Sheet',x,1,ASIN)
XLUtils.writeData(PRODUCT_RESEARCH,'Sheet1',r,16,TODAY_DATE_XLSX)
x+=1
i+=1
if i >= i_range:
print(f'List of {i_range} items, has been Created.')
break
else:
print('Error: product on the list')
XLUtils.writeData(FILE,'Sheet',x,1,' ')
print('Created list:\n',LIST)
print('__________________________________')
ALL_ITEMS = (TODAY_DATE + '.xlsx')
print('CSV file has been named: ', ALL_ITEMS)
DATA_XLS = pd.read_excel(FILE, 'Sheet', dtype=str, index_col=None)
DATA_XLS.to_csv(PRODUCT_NAME+'.csv', encoding='utf-8', index=False)
if __name__ == '__main__':
#---Product_list_generator ---# Variable --
wb = Workbook()
LIST = []
TODAY_DATE = datetime.today().strftime('%d_%m_%Y')
TODAY_DATE_XLSX = datetime.today().strftime('%d/%m/%Y')
PRODUCT_RESEARCH = ('Product_Research_copy2.xlsx') #<--- xlsx File
FILE = ('Product_'+TODAY_DATE + '.xlsx')
PRODUCT_NAME = ('Product_'+TODAY_DATE)
LISTING_GENERATOR()

Related

Openpyxl can't write an excel file more than once

I am trying to write to excel files using openpyxl module. For some reason it only lets me write once. If I try to write again it raises:
PermissionError: [Errno 13] Permission denied: 'expenses.xlsx'
The excel file and python program are in the same folder on D drive. What's the problem?
from openpyxl import Workbook
from openpyxl import load_workbook
from datetime import datetime
import os
class ExpenseTracker:
def __init__(self, file_name = "expenses.xlsx"):
self.fname = file_name
self.load_wb()
def load_wb(self):
"""
if the excel file doesn't exists it creates a new one
with a sheet, and calls self.col_values() which
adds values for first two columns in row 1
"""
try:
wb = load_workbook(self.fname)
except Exception:
wb = Workbook()
wb.create_sheet("Expenses", 0)
self.col_values()
wb.save(self.fname)
finally:
self.wb = wb
self.ws = self.wb["Expenses"]
def col_values(self):
# adds values for first two columns in row 1
self.ws.cell(row = 1, column = 1).value = "Date"
self.ws.cell(row = 1, column = 2).value = "Spent"
def spend_income(self, amount):
date = datetime.now()
date_formatted = date.strftime("%d.%b %Y")
last_row = self.ws.max_row + 1
last_col = self.ws.max_column + 1
self.ws.cell(row = last_row, column = 1).value = date_formatted
self.ws.cell(row = last_row, column = 2).value = amount
# writes under the last input in cols 1 and 2
self.wb.save(self.fname)
wbook = ExpenseTracker()
wbook.spend_income(5)

Search and find values in two excel sheets(xlrd to openpyxl)

wrbk = xlrd.open_workbook("D:Book1.xlsx")
idx = 0
book_1 = xlrd.open_workbook("D:Book2.xlsx")
sh_1 = book_1.sheet_by_name('Sheet4')
i = 0
for x in range(sh_1.nrows):
i = i + 1
if i >= sh_1.nrows:
break
if sh_1.cell(i, 2).value:
concat = sh_1.cell(i, 2).value
for y in range(len(wrbk.sheets())):
sht = wrbk.sheet_by_index(y)
for j in range(sht.ncols):
for cell in range(sht.nrows):
list = str(sht.cell(cell, j).value)
if list.__contains__(concat):
print(sh_1.cell(i, 2).value)
Im using this code to find a value in a workbook and then search that value in another workbook.
I'm using xlrd, the output is fine so far but i can't read and write with xlrd.i need suggestions to change this code from xlrd to openpyxl.
This defines a function to do the search and uses a Regular Expression to do the 'contains' match. Change the print to suit.
from openpyxl import load_workbook
import re
# open workbook
excel_file1 = 'D:Book1.xlsx'
wb1 = load_workbook(excel_file1) # wrbk
ws1 = wb1["Sheet1"]
excel_file2 = 'D:Book2.xlsx'
wb2 = load_workbook(excel_file2) # book_1
ws2 = wb2["Sheet4"] # sh_1
# fn to search all sheets in workbook
def myfind(wb,s):
for ws in wb.worksheets:
for c in range(1,ws.max_column+1):
for r in range(1,ws.max_row+1):
txt = ws.cell(r,c).value
if txt is None:
pass
elif re.search(s,txt):
print("Found",s,txt,ws,r,c)
# scan col C
for r in range(1,ws2.max_row+1):
s = ws2.cell(r, 3).value
if s is None:
pass
else:
print(r,s)
myfind(wb1,s)

Openpyxl coverts numbers in CSV as text in XLSX file

I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?

XLSXWriter refuses to create a second Excel File

I'm working on a program to split excel files into sections of 1000. I can't seem to get it to create a second excel file, as xlsxwriter doesn't create the second file.
from os.path import join, dirname, abspath
from xlrd.sheet import ctype_text
import csv
import os
import sys
import xlrd
import xlsxwriter
import xlwt
file_paths = sys.argv[1:]
draganddrop = ''.join(file_paths)
beginGrab = 0
counting = 0
endGrab = 1000
thousands = 0
if draganddrop == "":
fileName = raw_input("\nInput the file with extension\n>")
else:
fileName = draganddrop
stopPoint = fileName.index('.')
prepRev = fileName[stopPoint:]
preName = fileName[:stopPoint]
if prepRev == ".csv":
excelFile = xlsxwriter.Workbook(preName + '.xlsx')
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
content = csv.reader(f)
for index_col, data_in_col in enumerate(content):
for index_row, data_in_cell in enumerate(data_in_col):
worksheet.write(index_col,index_row,data_in_cell)
excelFile.close()
fileName = (preName + '.xlsx')
delMe = 1
print("Temporary Convert to xlsx done.\n")
stopPoint = fileName.index('.')
prepRev = fileName[0:stopPoint]
fname = join(dirname(abspath(__file__)), fileName)
xl_workbook = xlrd.open_workbook(fname)
sheet_names = xl_workbook.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
book = xlwt.Workbook(encoding="utf-8")
worksheet = book.add_sheet("Results", cell_overwrite_ok=True)
workbook = xlrd.open_workbook(fileName)
for sheet in workbook.sheets():
for row in range(sheet.nrows):
row = int(row)
if(int(row)>1000):
subDivide = int(row) / 1000
while(thousands != subDivide + 1):
thousands = thousands + 1
counting = 0
totalName = preName + "_" + str(thousands) + ".xlsx"
print(totalName)
excelFile = xlsxwriter.Workbook(str(totalName))
worksheet = excelFile.add_worksheet()
with open(totalName,'rb') as f:
col = xl_sheet.col_slice(0,1,10101010)
for idx, cell_obj in enumerate(col, start=beginGrab):
counting = counting + 1
if(counting == 1000):
break
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
cell_obj_str = str(cell_obj)
telePhone = (cell_obj_str[7:19])
worksheet.write(idx+1, 0, "1" + telePhone)
worksheet.write(0,0, "Telephone Number")
beginGrab = thousands * 1000
endGrab = beginGrab + 1000
excelFile.close()
excelFile = None
else:
print("Mate, this is Tiny!")
print ("Ding! Job Done!")
I've been rubber ducking this and I can't find where I'm at fault.
EDIT:
SOLVED!!
By creating a sheet and then closing it, the program can then grasp it. I will probably make a git issue about this.
if prepRev == ".csv":
totalName = preName + '.xlsx'
excelFile = xlsxwriter.Workbook(totalName)
excelFile.close()
Closing it lets open see it while it still contains the same info.
excelFile = xlsxwriter.Workbook(totalName)
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
Doesn't the save/close line need to be within the while loop? Otherwise it looks like it will only save either the first/last item:
while(thousands != subDivide + 1):
# write file
excelFile.close()
that line is probably the reason why you cannot read back your file and your script crashes:
fname = join(dirname(abspath('__file__')), '%s' % fileName)
'__file__' shouldn't have quotes. I'd do:
fname = join(dirname(abspath(__file__)), fileName)

How to use hyperlink in excel to link to another worksheet cell in same workbook in python

How to link to antother worksheet in same workbook in excel
this python script is not
from openpyxl import load_workbook
wb = load_workbook("excel_hyper_link_test.xlsx")
ws = wb.get_sheet_by_name("Sheet1")
link = "excel_hyper_link_test.xlsx#Sheet2!E5"
print ws.cell(row=1, column=1).value
ws.cell(row=1, column=1).hyperlink = (link)
After running this script i opened excel sheet and i could not see any hyperlink
note: I am using linux platform
This worked for me
from openpyxl import load_workbook
xlsFile='excel_hyper_link_test.xlsx'
wbook = load_workbook(xlsFile)
wsheet1= wbook.get_sheet_by_name('Sheet1')
cell1 = wsheet1.cell('A1')
cell1.hyperlink = '#Sheet2!E5'
cell1.value=r'XXX'
wbook.save(xlsFile)
import pandas as pd
import openpyxl as opxl
def hyperlinking(New_file_path):
xls = pd.ExcelFile(New_file_path)
sheets = xls.sheet_names # Get the worksheet names
wb = opxl.load_workbook(New_file_path)
ws = wb.create_sheet("Consolitated_Sheet") # Create a New worksheet
ws['A1'] = "Sheet_Name"; ws['B1'] = "Active_link" #New sheet we are proving column names
for i, j in enumerate(sheets):
# print('A'+str(i+2) + ' value is: ' + j)
ws['A' + str(i + 2)] = j # As A1 cell is occupied with column name we are taking reference of second row(A2).
ws['B' + str(i + 2)].value = '=HYPERLINK("%s", "%s")' % ('#' + str(j) + '!A1', 'Clickhere') # For A2 cell value we are providing hyperlinks of respective sheet
wb.save(New_file_path)
wb.close()

Categories

Resources