Unexpected index error while looping through workbooks in xlrd - python

I have a bunch of xlsx files, named from 1 to 169 like '1.xlsx', '2.xlsx' and so on... But while going through for loop, that read that files, the code does not see any rows in the 11th file (nrows in 11th file always is 0, while it is not if you open it manually) and gives me the IndexError (while these files are not empty).
I have no idea of what is going on with that code.
import os, xlwt, xlrd
file_dir = 'docs/'
files = os.listdir(file_dir)
#Open file and, read neaded variables and write them
def r_file(path, file):
workbook = xlrd.open_workbook(path+file)
info_sheet = workbook.sheet_by_index(0)
data_sheet = workbook.sheet_by_index(1)
#cells with company info
print info_sheet.nrows
company_name = info_sheet.cell(3,3).value
company_leg_adress = info_sheet.cell(4,3).value
company_fact_adress = info_sheet.cell(5,3).value
#cells with answers
question_1 = data_sheet.cell(3,10).value
question_1_1 = data_sheet.cell(8,2).value
question_1_2 = data_sheet.cell(13,2).value
question_2 = data_sheet.cell(18,10).value
question_3 = data_sheet.cell(25,10).value
question_3_additional = [data_sheet.cell(nrow,10).value for nrow in range(30,48)]
question_4 = data_sheet.cell(51,2).value
question_5 = data_sheet.cell(56,2).value
#get full row in list
row_as_list = [company_name,company_leg_adress,company_fact_adress, question_1, question_1_1, question_1_2, question_2, question_3, question_4]+question_3_additional
return row_as_list
#write companies in file
def w_file(companies):
wb = xlwt.Workbook()
ws = wb.add_sheet('aggr', cell_overwrite_ok=True)
for company in companies:
row_as_list = r_file(file_dir,str(company)+'.xlsx')
for each_index in row_as_list:
ws.write(company, row_as_list.index(each_index) , each_index)
wb.save('aggregation.xls')
companies_amount = [x for x in range(1,170)]
w_file(companies_amount)
after running it, it returns:
Traceback (most recent call last):
File "/home/ubuntu/workspace/ex50/bin/writing.py", line 44, in <module>
w_file(companies_amount)
File "/home/ubuntu/workspace/ex50/bin/writing.py", line 36, in w_file
row_as_list = r_file(file_dir,str(company)+'.xlsx')
File "/home/ubuntu/workspace/ex50/bin/writing.py", line 13, in r_file
company_name = info_sheet.cell(3,3).value
File "/usr/local/lib/python2.7/dist-packages/xlrd-1.0.0-py2.7.egg/xlrd/sheet.py", line 401, in cell
self._cell_types[rowx][colx],
IndexError: list index out of range
and it makes it only on the 11th file (no matter wich file will be the 11th).
Can you tell me what is going on with that thing?

Related

Python debugging error and split now working

file = open("teszt.txt", "r") #opening the file
number_of_lines = 0
for line in file: #counts the lines that start with 2021/
line = line.strip("\n")
words = line.split()
if line[:5] == "2021/":
number_of_lines += 1
for x in file:
x = file.split()
print(x)
#print("Number of lines, start with 2021: ", number_of_lines)
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
text = "2021/03/01 08:28:22"
date = 1
while date <= number_of_lines: #writes the texts into cells
ws[f'A{date}'] = f'{text}'
date += 1
file.close()
wb.save("teszt.xlsx")
When I am trying to debug it just gives me this error:
Exception has occurred: FileNotFoundError
[Errno 2] No such file or directory: 'teszt.txt'
but when I am running the code nothing goes wrong.
The other thing is when I am trying to use split() it just doesn't work.

how to check a CSV file row-1 and row-2 is empty or not?

i have 4 csv files in a particular folder. i want to check row-1 and row-2 of every csv file one by one. If row-1 or row-2 has blank then print 'file is empty' and print the particular file name also.
path = 'D:/Users/SPate233/Downloads/NS dashboard/sql_query/*.csv'
files = glob.glob(path)
for name in files:
with open(name) as file:
reader = csv.reader(file)
row1 = next(reader)
print(row1)
row2 = next(reader)
print(row2)
error-
Traceback (most recent call last):
File "D:\Users\SPate233\Downloads\test.py", line 13, in <module>
f_row = next(reader)
StopIteration
[Finished in 1.3s with exit code 1]
import pandas as pd
path = 'D:/Users/SPate233/Downloads/NS dashboard/sql_query/*.csv'
files = glob.glob(path)
for name in files:
print(name)
df = pd.read_csv(name)
df.info(verbose=True)
it will print the information about your data set, if there is any null value present df.info () will highlight data.

Python win32com workbook.SaveAs 'Invalid number of parameters.' error

I have been running a python script successfully for several months. The script edits a template excel spreadsheet using the win32com commands and then saves the edited workbook as a new .xlsx file.
results_path = "C:\\Users\\...\\"
results_title = results_path + input + "_Results.xlsx"
if os.path.exists(template_path):
xl= win32com.client.gencache.EnsureDispatch("Excel.Application")
xl.Application.DisplayAlerts = False
xl.Workbooks.Open(Filename= template_path)
xl.Application.Cells(2,6).Value = input
r = 17
for row in y_test:
row = str(row)
row = row[1:]
row = row[:-1]
xl.Application.Cells(r,2).Value = row
r += 1
# xl.Application.CalculateFullRebuild
# xl.ActiveWorkbook.SaveAs(Filename = save_title)
# time.sleep(20)
r = 17
for row in prediction:
row = str(row)
row = row[1:]
row = row[:-1]
xl.Application.Cells(r,3).Value = row
r += 1
xl.ActiveWorkbook.SaveAs(Filename = results_title)
Without changing anything in the script it no longer works. One day it just stopped working
Here is the error:
Traceback (most recent call last):
File "<ipython-input-5-aaef40198ed6>", line 1, in <module>
runfile('C:/Users/Alex/Desktop/Stocks/Python Stock Code/BizNet.py', wdir='C:/Users/Alex/Desktop/Stocks/Python Stock Code')
File "C:\Users\Alex\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\Users\Alex\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Alex/Desktop/Stocks/Python Stock Code/BizNet.py", line 99, in <module>
BizNet_test.accuracy_Test(companyInputOrderArray,input,model)
File "C:\Users\Alex\Desktop\Stocks\Python Stock Code\BizNet_test.py", line 125, in accuracy_Test
xl.ActiveWorkbook.SaveAs(results_title)
File "C:\Users\Alex\AppData\Local\Temp\gen_py\3.5\00020813-0000-0000-C000-000000000046x0x1x9\_Workbook.py", line 284, in SaveAs
, AccessMode, ConflictResolution, AddToMru, TextCodepage, TextVisualLayout
com_error: (-2147352562, 'Invalid number of parameters.', None, None)
Got it!!!
There was a temporary cache folder "gen_py" that I had to delete. The one that was referenced by the file path in the error.
"C:\Users\Alex\AppData\Local\Temp\gen_py\3.5\00020813-0000-0000-C000-000000000046x0x1x9\_Workbook.py"
I have no clue why this worked or how the error initially occurred, but everything is fine now.

python writing program to iterate a csv file to match field and save the result in a different data file

I am trying to write a program to do the following :
specify a field from a record in a csv file called data.
specify a field from a record in a csv file called log.
compare the position of the two in the data and in the log. If they are on the same line proceed to write the record in the file called log in a new file called result.
If the field does not match the record position in the log file proceed to move to the next record in the log file and compare it until a matching record is found and then the record is saved in the file called result.
reset the index of the log file
go to the next line in the data file and proceed to do the verification until the data file reaches the end.
This is whay i was able to do but i am stuck
import csv
def main():
datafile_csv = open('data.txt')
logfile_csv = open('log.txt')
row_data = []
row_log = []
row_log_temp = []
index_data = 1
index_log = 1
index_log_temp = index_log
counter = 0
data = ''
datareader = ''
logreader = ''
log = ''
# row = 0
logfile_len = sum (1 for lines in open('log.txt'))
with open('resultfile.csv','w') as csvfile:
out_write = csv.writer(csvfile, delimiter=',',quotechar='"')
with open('data.txt','r') as (data):
row_data = csv.reader(csvfile, delimiter=',', quotechar='"')
row_data = next(data)
print(row_data)
with open ('log.txt','r') as (log):
row_log = next(log)
print(row_log)
while counter != logfile_len:
comp_data = row_data[index_data:]
comp_log = row_log[index_log:]
comp_data = comp_data.strip('"')
comp_log = comp_log.strip('"')
print(row_data[1])
print(comp_data)
print(comp_log)
if comp_data != comp_log:
while comp_data != comp_log:
row_log = next(log)
comp_log = row_log[index_log]
out_write.writerow(row_log)
row_data = next(data)
else :
out_write.writerow(row_log)
row_data = next(data)
log.seek(0)
counter +=1
The problem i have are the following :
I cannot convert the data line in a string properly and i cannot compare correctly.
Also i need to be able to reset the pointer in the log file but seek does not seem to be working....
This is the content of the data file
"test1","test2","test3"
"1","2","3"
"4","5","6"
This is the content of the log file
"test1","test2","test3"
"4","5","6"
"1","2","3"
This is what the compiler return me
t
"test1","test2","test3"
t
test1","test2","test3"
test1","test2","test3"
1
1","2","3"
test1","test2","test3"
Traceback (most recent call last):
File "H:/test.py", line 100, in <module>
main()
File "H:/test.py", line 40, in main
comp_log = row_log[index_log]
IndexError: string index out of range
Thank you very much for the help
Regards
Danilo
Joining two files by columns (rowcount and a Specific Column[not defined]), and returning the results limited to the columns of the left/first file.
import petl
log = petl.fromcsv('log.txt').addrownumbers() # Load csv/txt file into PETL table, and add row numbers
log_columns = len(petl.header(log)) # Get the amount of columns in the log file
data = petl.fromcsv('data.txt').addrownumbers() # Load csv/txt file into PETL table, and add row numbers
joined_files = petl.join(log, data, key=['row', 'SpecificField']) # Join the tables using row and a specific field
joined_files = petl.cut(joined_files, *range(1, log_columns)) # Remove the extra columns obtained from right table
petl.tocsv(joined_files, 'resultfile.csv') # Output results to csv file
log.txt
data.txt
resultfile.csv
Also Do not forget to pip install (version used for this example):
pip install petl==1.0.11

Python Openpyxl Append issue

I have hundreds of XML files that I need to extract two values from and ouput in an Excel or CSV file. This is the code I currently have:
#grabs idRoot and typeId root values from XML files
import glob
from openpyxl import Workbook
from xml.dom import minidom
import os
wb = Workbook()
ws = wb.active
def typeIdRoot (filename):
f = open(filename, encoding = "utf8")
for xml in f:
xmldoc = minidom.parse(f)
qmd = xmldoc.getElementsByTagName("MainTag")[0]
typeIdElement = qmd.getElementsByTagName("typeId")[0]
root = typeIdElement.attributes["root"]
global rootValue
rootValue = root.value
print ('rootValue =' ,rootValue,)
ws.append([rootValue])
wb.save("some.xlsx")
wb = Workbook()
ws = wb.active
def idRoot (filename):
f = open(filename, encoding = "utf8")
for xml in f:
xmldoc = minidom.parse(f)
tcd = xmldoc.getElementsByTagName("MainTag")[0]
activitiesElement = tcd.getElementsByTagName("id")[0]
sport = activitiesElement.attributes["root"]
sportName = sport.value
print ('idRoot =' ,sportName,)
ws.append([idRoot])
wb.save("some.xlsx")
for file in glob.glob("*.xml"):
typeIdRoot (file)
for file in glob.glob("*.xml"):
idRoot (file)
The first value follows a 1.11.111.1.111111.1.3 format. The second mixes letters and numbers. I believe this is the reason for the error:
Traceback (most recent call last):
File "C:\Python34\Scripts\xml\good.py", line 64, in <module>
idRoot (file)
File "C:\Python34\Scripts\xml\good.py", line 54, in idRoot
ws.append([idRoot])
File "C:\Python34\lib\site-packages\openpyxl\worksheet\worksheet.py", line 754, in append
cell = self._new_cell(col, row_idx, content)
File "C:\Python34\lib\site-packages\openpyxl\worksheet\worksheet.py", line 376, in _new_cell
cell = Cell(self, column, row, value)
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 131, in __init__
self.value = value
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 313, in value
self._bind_value(value)
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 217, in _bind_value
raise ValueError("Cannot convert {0} to Excel".format(value))
ValueError: Cannot convert <function idRoot at 0x037D24F8> to Excel
I would like the result to add both values on the same row. So then I would have a new row for each file in the directory. I need to add the second value to the second row.
as such:
Value 1 Value 2
1.11.111.1.111111.1.3 10101011-0d10-0101-010d-0dc1010e0101
idRoot is the name of your FUNCTION.
So when you write
ws.append([idRoot])
you probably mean:
ws.append([sportName])
Of course, you can write something like:
ws.append([rootValue, sportName])
providing both variables are defined with reasonable values.
One last thing, you should save your file only once.

Categories

Resources