Hey I wanted to get quick Output for slicing my Source XLSX file for collect Data from Cell on index: 11, but my script seems to working very slow.
Expected output is Collected items from column index(11) Cell when on column index(16) Cell value = None. Script check from begin every row on column index(16) if value == None, but on my file i have thousands positions before script start collecting Data,
Can I speed up this process or find faster way?
XLUtils.py:
import openpyxl
def getRowCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_row)
def getColumnCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_column)
def readData(file,sheetName,rownum,columnno):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return sheet.cell(row=rownum, column=columnno).value
def writeData(file,sheetName,rownum,columno,data):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
sheet.cell(row=rownum, column=columno).value = data
workbook.save(file)
My Script:
import pandas as pd
import XLUtils
from openpyxl import Workbook
from datetime import datetime
def LISTING_GENERATOR():
#This function Create product list file for current day
i = 1
i_range = 50
r = 1
x = 1
rows = 50
LISTED_DATE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,16)
if LISTED_DATE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(ASIN)
wb.save('Product_'+TODAY_DATE + '.xlsx')
print('File has been created: ',FILE)
for r in range(2,rows+1):
CHECK_ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 11)
if CHECK_ASIN == None:
print('No more ASIN avaiable')
break
FE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 16)
if FE != None:
print('Product last added: ',FE)
if FE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(f'ASIN nr. {i}: {ASIN}')
LIST.append(ASIN)
XLUtils.writeData(FILE,'Sheet',x,1,ASIN)
XLUtils.writeData(PRODUCT_RESEARCH,'Sheet1',r,16,TODAY_DATE_XLSX)
x+=1
i+=1
if i >= i_range:
print(f'List of {i_range} items, has been Created.')
break
else:
print('Error: product on the list')
XLUtils.writeData(FILE,'Sheet',x,1,' ')
print('Created list:\n',LIST)
print('__________________________________')
ALL_ITEMS = (TODAY_DATE + '.xlsx')
print('CSV file has been named: ', ALL_ITEMS)
DATA_XLS = pd.read_excel(FILE, 'Sheet', dtype=str, index_col=None)
DATA_XLS.to_csv(PRODUCT_NAME+'.csv', encoding='utf-8', index=False)
if __name__ == '__main__':
#---Product_list_generator ---# Variable --
wb = Workbook()
LIST = []
TODAY_DATE = datetime.today().strftime('%d_%m_%Y')
TODAY_DATE_XLSX = datetime.today().strftime('%d/%m/%Y')
PRODUCT_RESEARCH = ('Product_Research_copy2.xlsx') #<--- xlsx File
FILE = ('Product_'+TODAY_DATE + '.xlsx')
PRODUCT_NAME = ('Product_'+TODAY_DATE)
LISTING_GENERATOR()
Related
I am trying to write to excel files using openpyxl module. For some reason it only lets me write once. If I try to write again it raises:
PermissionError: [Errno 13] Permission denied: 'expenses.xlsx'
The excel file and python program are in the same folder on D drive. What's the problem?
from openpyxl import Workbook
from openpyxl import load_workbook
from datetime import datetime
import os
class ExpenseTracker:
def __init__(self, file_name = "expenses.xlsx"):
self.fname = file_name
self.load_wb()
def load_wb(self):
"""
if the excel file doesn't exists it creates a new one
with a sheet, and calls self.col_values() which
adds values for first two columns in row 1
"""
try:
wb = load_workbook(self.fname)
except Exception:
wb = Workbook()
wb.create_sheet("Expenses", 0)
self.col_values()
wb.save(self.fname)
finally:
self.wb = wb
self.ws = self.wb["Expenses"]
def col_values(self):
# adds values for first two columns in row 1
self.ws.cell(row = 1, column = 1).value = "Date"
self.ws.cell(row = 1, column = 2).value = "Spent"
def spend_income(self, amount):
date = datetime.now()
date_formatted = date.strftime("%d.%b %Y")
last_row = self.ws.max_row + 1
last_col = self.ws.max_column + 1
self.ws.cell(row = last_row, column = 1).value = date_formatted
self.ws.cell(row = last_row, column = 2).value = amount
# writes under the last input in cols 1 and 2
self.wb.save(self.fname)
wbook = ExpenseTracker()
wbook.spend_income(5)
wrbk = xlrd.open_workbook("D:Book1.xlsx")
idx = 0
book_1 = xlrd.open_workbook("D:Book2.xlsx")
sh_1 = book_1.sheet_by_name('Sheet4')
i = 0
for x in range(sh_1.nrows):
i = i + 1
if i >= sh_1.nrows:
break
if sh_1.cell(i, 2).value:
concat = sh_1.cell(i, 2).value
for y in range(len(wrbk.sheets())):
sht = wrbk.sheet_by_index(y)
for j in range(sht.ncols):
for cell in range(sht.nrows):
list = str(sht.cell(cell, j).value)
if list.__contains__(concat):
print(sh_1.cell(i, 2).value)
Im using this code to find a value in a workbook and then search that value in another workbook.
I'm using xlrd, the output is fine so far but i can't read and write with xlrd.i need suggestions to change this code from xlrd to openpyxl.
This defines a function to do the search and uses a Regular Expression to do the 'contains' match. Change the print to suit.
from openpyxl import load_workbook
import re
# open workbook
excel_file1 = 'D:Book1.xlsx'
wb1 = load_workbook(excel_file1) # wrbk
ws1 = wb1["Sheet1"]
excel_file2 = 'D:Book2.xlsx'
wb2 = load_workbook(excel_file2) # book_1
ws2 = wb2["Sheet4"] # sh_1
# fn to search all sheets in workbook
def myfind(wb,s):
for ws in wb.worksheets:
for c in range(1,ws.max_column+1):
for r in range(1,ws.max_row+1):
txt = ws.cell(r,c).value
if txt is None:
pass
elif re.search(s,txt):
print("Found",s,txt,ws,r,c)
# scan col C
for r in range(1,ws2.max_row+1):
s = ws2.cell(r, 3).value
if s is None:
pass
else:
print(r,s)
myfind(wb1,s)
I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?
I'm working on a program to split excel files into sections of 1000. I can't seem to get it to create a second excel file, as xlsxwriter doesn't create the second file.
from os.path import join, dirname, abspath
from xlrd.sheet import ctype_text
import csv
import os
import sys
import xlrd
import xlsxwriter
import xlwt
file_paths = sys.argv[1:]
draganddrop = ''.join(file_paths)
beginGrab = 0
counting = 0
endGrab = 1000
thousands = 0
if draganddrop == "":
fileName = raw_input("\nInput the file with extension\n>")
else:
fileName = draganddrop
stopPoint = fileName.index('.')
prepRev = fileName[stopPoint:]
preName = fileName[:stopPoint]
if prepRev == ".csv":
excelFile = xlsxwriter.Workbook(preName + '.xlsx')
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
content = csv.reader(f)
for index_col, data_in_col in enumerate(content):
for index_row, data_in_cell in enumerate(data_in_col):
worksheet.write(index_col,index_row,data_in_cell)
excelFile.close()
fileName = (preName + '.xlsx')
delMe = 1
print("Temporary Convert to xlsx done.\n")
stopPoint = fileName.index('.')
prepRev = fileName[0:stopPoint]
fname = join(dirname(abspath(__file__)), fileName)
xl_workbook = xlrd.open_workbook(fname)
sheet_names = xl_workbook.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
book = xlwt.Workbook(encoding="utf-8")
worksheet = book.add_sheet("Results", cell_overwrite_ok=True)
workbook = xlrd.open_workbook(fileName)
for sheet in workbook.sheets():
for row in range(sheet.nrows):
row = int(row)
if(int(row)>1000):
subDivide = int(row) / 1000
while(thousands != subDivide + 1):
thousands = thousands + 1
counting = 0
totalName = preName + "_" + str(thousands) + ".xlsx"
print(totalName)
excelFile = xlsxwriter.Workbook(str(totalName))
worksheet = excelFile.add_worksheet()
with open(totalName,'rb') as f:
col = xl_sheet.col_slice(0,1,10101010)
for idx, cell_obj in enumerate(col, start=beginGrab):
counting = counting + 1
if(counting == 1000):
break
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
cell_obj_str = str(cell_obj)
telePhone = (cell_obj_str[7:19])
worksheet.write(idx+1, 0, "1" + telePhone)
worksheet.write(0,0, "Telephone Number")
beginGrab = thousands * 1000
endGrab = beginGrab + 1000
excelFile.close()
excelFile = None
else:
print("Mate, this is Tiny!")
print ("Ding! Job Done!")
I've been rubber ducking this and I can't find where I'm at fault.
EDIT:
SOLVED!!
By creating a sheet and then closing it, the program can then grasp it. I will probably make a git issue about this.
if prepRev == ".csv":
totalName = preName + '.xlsx'
excelFile = xlsxwriter.Workbook(totalName)
excelFile.close()
Closing it lets open see it while it still contains the same info.
excelFile = xlsxwriter.Workbook(totalName)
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
Doesn't the save/close line need to be within the while loop? Otherwise it looks like it will only save either the first/last item:
while(thousands != subDivide + 1):
# write file
excelFile.close()
that line is probably the reason why you cannot read back your file and your script crashes:
fname = join(dirname(abspath('__file__')), '%s' % fileName)
'__file__' shouldn't have quotes. I'd do:
fname = join(dirname(abspath(__file__)), fileName)
How to link to antother worksheet in same workbook in excel
this python script is not
from openpyxl import load_workbook
wb = load_workbook("excel_hyper_link_test.xlsx")
ws = wb.get_sheet_by_name("Sheet1")
link = "excel_hyper_link_test.xlsx#Sheet2!E5"
print ws.cell(row=1, column=1).value
ws.cell(row=1, column=1).hyperlink = (link)
After running this script i opened excel sheet and i could not see any hyperlink
note: I am using linux platform
This worked for me
from openpyxl import load_workbook
xlsFile='excel_hyper_link_test.xlsx'
wbook = load_workbook(xlsFile)
wsheet1= wbook.get_sheet_by_name('Sheet1')
cell1 = wsheet1.cell('A1')
cell1.hyperlink = '#Sheet2!E5'
cell1.value=r'XXX'
wbook.save(xlsFile)
import pandas as pd
import openpyxl as opxl
def hyperlinking(New_file_path):
xls = pd.ExcelFile(New_file_path)
sheets = xls.sheet_names # Get the worksheet names
wb = opxl.load_workbook(New_file_path)
ws = wb.create_sheet("Consolitated_Sheet") # Create a New worksheet
ws['A1'] = "Sheet_Name"; ws['B1'] = "Active_link" #New sheet we are proving column names
for i, j in enumerate(sheets):
# print('A'+str(i+2) + ' value is: ' + j)
ws['A' + str(i + 2)] = j # As A1 cell is occupied with column name we are taking reference of second row(A2).
ws['B' + str(i + 2)].value = '=HYPERLINK("%s", "%s")' % ('#' + str(j) + '!A1', 'Clickhere') # For A2 cell value we are providing hyperlinks of respective sheet
wb.save(New_file_path)
wb.close()