Occasional PermissionError when dict-writing to csv - python

I've written a tkinter app in Python 3.7, which extracts from main CSV, pre-filtered by user, information into smaller reports. My issue is that while writing filtered data from main CSV into report I occasionally get a PermissionError:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\mdiakuly\AppData\Roaming\Python\Python37\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 286, in report_generation_prep
self.report_generation(names_list, relevant_params, specific_value, extracted_file)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 344, in report_generation
processing_data(rd,column)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 336, in processing_data
writing_new_report(gathering_row)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 299, in writing_new_report
with open(extracted_file, 'a+', newline='') as write_in:
PermissionError: [Errno 13] Permission denied: 'C:/Users/myusername/Desktop/reporter/Partial_report_14_10_2021T15-13-12.csv'
Once it has extracted only one row and through an error, another time it did whole extraction with no error, and few other times it extracted thousands of rows and through an error.
CSV file which is being written into was never opened during info extraction.
Has anyone faced the same issue or maybe has an idea how to fix it?
def writing_new_report(complete_row):
with open(extracted_file, 'a+', newline='') as write_in:
wt = csv.DictWriter(write_in, delimiter=';', fieldnames=relevant_params)
if self.debug:
print(complete_row)
wt.writerow(complete_row)
def processing_data(r_d,column='def'):
for idx, row in enumerate(r_d): #looping through major csv
self.progress.update()
if self.debug:
print(f'{idx:}',end=' / ')
gathering_row = {}
if column != 'def':
if row[column] not in names_list:
continue
else:
names_list.remove(row[column])
pass
else:
pass
for param, value in zip(relevant_params,specific_value):
self.progress.update()
if self.debug:
print(f'{row[param]:}',end=' / ')
gathering_row[param] = row[param]
if value == '---- All ----':
pass
elif value != row[param]:
if self.debug:
print(f'{row[param]:} - Skipped')
break
if param == relevant_params[len(relevant_params)-1]:
if self.debug:
print(f'{row[param]:} - Written')
writing_new_report(gathering_row)

Related

zipfile.LargeZipFile: Filesize would require ZIP64 extensions

I am creating an Excel file and writing some rows to it. Here is what I have written:
import string
import xlsxwriter
workbook = xlsxwriter.Workbook('DataSet.xlsx')
worksheet = workbook.add_worksheet()
df2 = pd.read_csv ('d.csv', low_memory=False)
from nltk.tokenize import word_tokenize
count = 0
for index, row in df2.iterrows():
if row['source_id'] == 'EN':
count += 1
print(count)
text = row['text']
new_string = text.translate(str.maketrans('', '', string.punctuation))
new_string = word_tokenize(new_string)
sentence = ''
tokens = ''
for word in new_string:
sample_len = len(new_string)
count_len = 0
sentence += word
sentence += ' '
tokens += word
if count_len != sample_len:
tokens += ', '
worksheet.write(count, 3, tokens)
worksheet.write(count, 2, sentence)
worksheet.write(count, 1, 'Discrimination')
worksheet.write(count, 0, count)
workbook.close()
However, after the row number 94165, it gives me the following error and won't proceed anymore:
Traceback (most recent call last):
File "/Users/PycharmProjects/pythonProject/venv/lib/python3.9/site-packages/xlsxwriter/workbook.py", line 323, in close
self._store_workbook()
File "/Users/PycharmProjects/pythonProject/venv/lib/python3.9/site-packages/xlsxwriter/workbook.py", line 745, in _store_workbook
raise e
File "/Users/PycharmProjects/pythonProject/venv/lib/python3.9/site-packages/xlsxwriter/workbook.py", line 739, in _store_workbook
xlsx_file.write(os_filename, xml_filename)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/zipfile.py", line 1761, in write
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/zipfile.py", line 1505, in open
return self._open_to_write(zinfo, force_zip64=force_zip64)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/zipfile.py", line 1597, in _open_to_write
self._writecheck(zinfo)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/zipfile.py", line 1712, in _writecheck
raise LargeZipFile(requires_zip64 +
zipfile.LargeZipFile: Filesize would require ZIP64 extensions
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/PycharmProjects/pythonProject/Python file.py", line 64, in <module>
workbook.close()
File "/Users/PycharmProjects/pythonProject/venv/lib/python3.9/site-packages/xlsxwriter/workbook.py", line 327, in close
raise FileSizeError("Filesize would require ZIP64 extensions. "
xlsxwriter.exceptions.FileSizeError: Filesize would require ZIP64 extensions. Use workbook.use_zip64().
Does anyone know why this has occurred and how it can be solved?
The issue is caused by the fact that the resulting file, or components of it are greater than 4GB in size. This requires an additional parameter to be passed by xlsxwriter to the Python standard library zipfile.py in order to support larger zip file sizes.
The answer/solution is buried in the exception message:
xlsxwriter.exceptions.FileSizeError: Filesize would require ZIP64 extensions.
Use workbook.use_zip64().
You can either add it as a constructor option or workbook method:
workbook = xlsxwriter.Workbook(filename, {'use_zip64': True})
# Same as:
workbook = xlsxwriter.Workbook(filename)
workbook.use_zip64()
See the docs on the Workbook Constructor and workbook.use_zip64() including the following Note:
Note:
When using the use_zip64() option the zip file created by the Python standard library zipfile.py may cause Excel to issue a warning about repairing the file. This warning is annoying but harmless. The “repaired” file will contain all of the data written by XlsxWriter, only the zip container will be changed.

Python csv get original raw data line

In python it is easy to read and parse a csv file and process line-by-line:
reader = csv.reader(open("my_csv_file.csv"))
for row in reader:
# row is an array or dict
parsed_data = my_data_parser(row)
where my_data_parser is my own piece of logic that takes input data, parses and does logic.
If my parser fails, I would like to log the entire original line of csv file, but it seems that from the csv reader i have no more access to it.
Is it possible to retrieve the original raw line data?
It doesn't seem like the csv.reader() exposes the file object it's iterating, however, you could use the reader's line_num attribute to achieve what you want.
For example:
import csv
file = open("my_csv_file.csv")
lines = file.readlines()
reader = csv.reader(lines)
for row in reader:
# row is an array or dict
try:
parsed_data = my_data_parser(row)
except MyDataParserError:
print(f"ERROR in line number {reader.line_num}")
print("Full line:")
print(lines[reader.line_num])
file.close()
Alternative
If you'd like to avoid always loading the file into memory, you could instead keep your initial way of reading the file and only read the whole file into memory if an error occurred:
import csv
reader = csv.reader(open("my_csv_file.csv"))
for row in reader:
# row is an array or dict
try:
parsed_data = my_data_parser(row)
except MyDataParserError:
# Only read the whole file into memory when an error occurred.
file = open("my_csv_file.csv")
lines = file.readlines()
file.close()
print(f"ERROR in line number {reader.line_num}")
print("Full line:")
print(lines[reader.line_num])
You can access the row line number with
reader.line_num
But there seems to be no direct way to access the actual line (says doc). Here is iterative method that avoids reading the whole file to memory at any step:
import csv
class MyException(Exception):
pass
def super_logic(line): # Some silly logic to get test code running
if len(line) != 2 or line[1] != '1':
raise MyException("Invalid value")
print("Process: %s" % line)
class LastLineReader:
def __init__(self, fn ):
self.fid = open(fn)
def __iter__(self):
return self
def __next__(self):
line = self.fid.readline() # Read single line and cache it local object
if len(line) == 0:
raise StopIteration()
self.current_line = line.strip()
return line
reader_with_lines = LastLineReader( "my_csv_file.csv" )
reader = csv.reader( reader_with_lines )
for line in reader:
try:
super_logic(line)
except MyException as e:
print("Got exception: %s at line '%s'" % ( e, reader_with_lines.current_line ))
(Edited: removed other solutions as they are also visible on other ppl posts)
As alternative to reader.line_num
for index, row in enumerate(reader):
print(i + 1, row)

json file load issue using Python

I was try to dump and load dictionary to json file using Python. I can dump file without a problem. However, when i try to load file into temp dictionary, error occur. I can not figure out the issue, Can anyone can help me on this ?Thanks
import os
import json
def get_stored_birth():
filename ='C:/Users/Sam/name.json'
temp = {}
with open(filename,'r+') as f_obj1:
temp =json.load(f_obj1)
print(temp.get(name),"is the birthday of",name)
def get_new_birth():
birth=str(input())
my_dict[name]=birth
print("Birthday database updated")
filename ='C:/Users/Sam/name.json'
with open(filename,'a') as f_obj:
f_obj.write('\n')
json.dump(my_dict,f_obj)
return name
my_dict={}
def quit():
"""This function quit program"""
return quit
while True:
filename ='C:/Users/Sam/name.json'
print("Enter a name:(blank to quit)")
name= str(input())
if name=="":
exit()
if name in my_dict:
name= get_stored_birth()
else:
print("I dont have info for",name)
print("What is their birthday")
name= get_new_birth()
The traceback as follow:
Traceback (most recent call last):
File "C:\Users\Sam\Desktop\Tianxu_Assignment2\Assignment 2.py", line 45, in <module>
name= get_stored_birth()
File "C:\Users\Sam\Desktop\Tianxu_Assignment2\Assignment 2.py", line 10, in get_stored_birth
temp =json.load(f_obj1)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\__init__.py", line 299, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\decoder.py", line 342, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 3 column 1 (char 12)
Problem solved !!!
1. replace with open(filename, 'a') as f_obj with replace with open(filename, 'w')
2.
if name in my_dict:
should not check my_dict !!! every time start a program will use new "dictionary". I move
filename ='C:/Users/Sam/name.json'
temp = {}
with open(filename,'r+') as f_obj1:
temp =json.load(f_obj1)
to main loop and check if name in temp:
Thanks guys!!!
You are appending new json to previous jsons you have created. Just substitute this line:
with open(filename,'a') as f_obj:
with this one:
with open(filename,'w') as f_obj:

IOError: [Errno 13] Permission denied while reading csv file in python

I have quite large csv file and i am using csv module to read csv file and process it
and below code snippet which i have in my project. The file has around 9828075 records in file the code worked fine till 637922th record later which it raise below error:
ERROR Tue, 14 Apr 2013 09:59:29 Traceback (most recent call last):
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 316, in next
File "E:\MyProject\DataExa\Python26\lib\csv.py", line 104, in next
row = self.reader.next()
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 366, in capture_record_data
IOError: [Errno 13] Permission denied
My code looks like below...
import csv
class MyCsvReader (object):
"""
"""
def __init__ (self, import_source, dialect='excel'):
"""
"""
self.import_source = import_source
self.dialect = dialect
self.post_init()
def post_init(self):
"""
Do any post init logic....
"""
pass
def init_reader (self):
self.import_file = file(self.import_source, 'rU')
#_reader = csv.reader (self.capture_record_data(self.import_file),
# dialect=self.dialect)
# create a CSV iterator that returns a dict with each next() call
self.reader = csv.DictReader(self.capture_record_data(self.import_file),
dialect=self.dialect)
def next (self):
"""
Returns a dict containing data read from the CSV file.
"""
#todo: do a magic to remove all spaces in data....
return self.reader.next()
def __iter__ (self):
"Special method to make an instance of this class into an iterator"
return self
def capture_record_data (self, row_iterator):
"""
Generator for capturing the record data before passing to csv
"""
for row in row_iterator:
self.raw_data = row
yield row
def close(self):
if hasattr(self, 'import_file'):
self.import_file.close()
if __name__ == '__main__':
reader_obj = MyCsvReader (import_source='test.csv')
reader_obj.init_reader()
while True:
try:
print reader_obj.reader.next()
except StopIteration, e:
break
Could any one help on this to figure out why does i am getting IOError: [Errno 13] Permission denied error
while processing file.

Try except not catching IOError from class

I have a class that reads a file of a particular format. These files tend to be greater than 8Gb in size so are usually compressed. When reading the file in I wanted to catch the error of the file not being compressed but neither except IOError: nor except: will do so, for some reason I don't understand.
There are a few classes defined together in the file VCF.py, though the offending class is vcfReader(). The file from which the object is instantiated is below test.py, and lastly the Traceback.
Anyone have any ideas as to why it isn't working?
VCF.py
import gzip
import sys
class Call():
'''
Class to handle the sample genotypes and associated information
'''
def __init__(self,site,sample,format,data):
#do stuff here#
class Variant():
'''
Class for a single row from a VCF file.
'''
def __init__(self, entry, samples):
#do other stuff here
class vcfReader():
'''
read a compressed vcf file ignoring the meta-information, but parsing the header for sample names
'''
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader()
def readHeader(self):
line = self.vcfFile.next()
while line.startswith('#'):
if line[1]!='#':
#lines that start with ##, i.e. meta tags are ignored. Header line starting with '#', sample names are extracted.
return line.rstrip().rsplit('\t')[9:]
else:
line = self.vcfFile.next()
def __iter__(self):
return self
def next(self):
row = self.vcfFile.next()
return Variant(row, self.samples)
and then test.py
import VCF
from collections import Counter
if __name__=='__main__':
vcfreader = VCF.vcfReader('all_samples.vcf')
filters = []
for i in vcfreader:
filters.extend(i.FILTERS)
filters = Counter(filters)
for k,v in filters.iteritems():
print "{0}: {1}".format(k,v)
Here is the traceback:
Traceback (most recent call last):
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\test.py", line 10, in <module>
vcfreader = VCF.vcfReader('all_samples.vcf')
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 95, in __init__
self.samples = self.readHeader()
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 98, in readHeader
line = self.vcfFile.next()
File "C:\Python27\lib\gzip.py", line 450, in readline
c = self.read(readsize)
File "C:\Python27\lib\gzip.py", line 256, in read
self._read(readsize)
File "C:\Python27\lib\gzip.py", line 291, in _read
self._read_gzip_header()
File "C:\Python27\lib\gzip.py", line 185, in _read_gzip_header
raise IOError, 'Not a gzipped file'
IOError: Not a gzipped file
The reason your except block doesn't catch the exception is that it happens outside the try block:
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader() # <<<<<<<< exception is raised here

Categories

Resources