IOError: [Errno 13] Permission denied while reading csv file in python - python

I have quite large csv file and i am using csv module to read csv file and process it
and below code snippet which i have in my project. The file has around 9828075 records in file the code worked fine till 637922th record later which it raise below error:
ERROR Tue, 14 Apr 2013 09:59:29 Traceback (most recent call last):
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 316, in next
File "E:\MyProject\DataExa\Python26\lib\csv.py", line 104, in next
row = self.reader.next()
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 366, in capture_record_data
IOError: [Errno 13] Permission denied
My code looks like below...
import csv
class MyCsvReader (object):
"""
"""
def __init__ (self, import_source, dialect='excel'):
"""
"""
self.import_source = import_source
self.dialect = dialect
self.post_init()
def post_init(self):
"""
Do any post init logic....
"""
pass
def init_reader (self):
self.import_file = file(self.import_source, 'rU')
#_reader = csv.reader (self.capture_record_data(self.import_file),
# dialect=self.dialect)
# create a CSV iterator that returns a dict with each next() call
self.reader = csv.DictReader(self.capture_record_data(self.import_file),
dialect=self.dialect)
def next (self):
"""
Returns a dict containing data read from the CSV file.
"""
#todo: do a magic to remove all spaces in data....
return self.reader.next()
def __iter__ (self):
"Special method to make an instance of this class into an iterator"
return self
def capture_record_data (self, row_iterator):
"""
Generator for capturing the record data before passing to csv
"""
for row in row_iterator:
self.raw_data = row
yield row
def close(self):
if hasattr(self, 'import_file'):
self.import_file.close()
if __name__ == '__main__':
reader_obj = MyCsvReader (import_source='test.csv')
reader_obj.init_reader()
while True:
try:
print reader_obj.reader.next()
except StopIteration, e:
break
Could any one help on this to figure out why does i am getting IOError: [Errno 13] Permission denied error
while processing file.

Related

How do I process numerous files using numerous regex conditions?

I want to process the cna and linear_cna files by reading only lines that do not contain either Hugo_Symbol or -01.
import os
import re
class DataProcessing:
def __init__(self, data):
self.line = [line.rstrip('\n') for line in data]
self.data = data
def read_data(self):
with open(self.data):
pass
return self.line
def read_cna(self):
# In cna and linear_cna files, skip lines that either begin with "Hugo_Symbol" or "-01"
for lines in self.line:
cna_lines = [lines for l in cna if not re.findall(r"^(Hugo_Symbol|[-01])", l)]
return cna_lines
...continue...
dp_cna = DataProcessing("data_cna.txt")
dp_linear_cna = DataProcessing("data_linear_cna.txt")
dp_cna.read_data()
dp_linear_cna.read_data()
Traceback:
Traceback (most recent call last):
File "C:/Users/User/PycharmProjects/testing/main.py", line 24, in <module>
cna = DataProcessing.read_data("data_cna.txt")
File "C:/Users/User/PycharmProjects/testing/main.py", line 14, in read_data
with open(self.data) as f:
AttributeError: 'str' object has no attribute 'data'
The right way to use your class consists of two steps.
Step 1: Create an instance of DataProcessing by invoking __init__. You do this by declaring dp = DataProcessing("data_cna.txt"). You can replace dp with any name you want.
Now dp is an instance of DataProcessing. Its data field is set to "DataProcessing". In other words, dp remembers the name of the file.
Step 2: Call read_data on dp. Note that read_data has only one parameter, namely self, which should not be passed as an argument, meaning it takes no arguments. Therefore, the right way to call read_data is just read_data(). To call read_data on dp you do dp.read_data().

Occasional PermissionError when dict-writing to csv

I've written a tkinter app in Python 3.7, which extracts from main CSV, pre-filtered by user, information into smaller reports. My issue is that while writing filtered data from main CSV into report I occasionally get a PermissionError:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\mdiakuly\AppData\Roaming\Python\Python37\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 286, in report_generation_prep
self.report_generation(names_list, relevant_params, specific_value, extracted_file)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 344, in report_generation
processing_data(rd,column)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 336, in processing_data
writing_new_report(gathering_row)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 299, in writing_new_report
with open(extracted_file, 'a+', newline='') as write_in:
PermissionError: [Errno 13] Permission denied: 'C:/Users/myusername/Desktop/reporter/Partial_report_14_10_2021T15-13-12.csv'
Once it has extracted only one row and through an error, another time it did whole extraction with no error, and few other times it extracted thousands of rows and through an error.
CSV file which is being written into was never opened during info extraction.
Has anyone faced the same issue or maybe has an idea how to fix it?
def writing_new_report(complete_row):
with open(extracted_file, 'a+', newline='') as write_in:
wt = csv.DictWriter(write_in, delimiter=';', fieldnames=relevant_params)
if self.debug:
print(complete_row)
wt.writerow(complete_row)
def processing_data(r_d,column='def'):
for idx, row in enumerate(r_d): #looping through major csv
self.progress.update()
if self.debug:
print(f'{idx:}',end=' / ')
gathering_row = {}
if column != 'def':
if row[column] not in names_list:
continue
else:
names_list.remove(row[column])
pass
else:
pass
for param, value in zip(relevant_params,specific_value):
self.progress.update()
if self.debug:
print(f'{row[param]:}',end=' / ')
gathering_row[param] = row[param]
if value == '---- All ----':
pass
elif value != row[param]:
if self.debug:
print(f'{row[param]:} - Skipped')
break
if param == relevant_params[len(relevant_params)-1]:
if self.debug:
print(f'{row[param]:} - Written')
writing_new_report(gathering_row)

How to chain file objects in Python?

I'm trying to find a simple way to chain file-like objects. I have a single CSV file which is split into a number of segments on disk. I'd like to be able to pass them to csv.DictReader without having to make a concatenated temporary first.
Something like:
files = map(io.open, filenames)
for row in csv.DictReader(io.chain(files)):
print(row[column_name])
But I haven't been able to find anything like io.chain. If I were parsing it myself, I could do something like:
from itertools import chain
def lines(fp):
for line in fp.readlines():
yield line
a = open('segment-1.dat')
b = open('segment-2.dat')
for line in chain(lines(a), lines(b)):
row = line.strip().split(',')
However DictReader needs something it can call read() on, so this method doesn't work. I can iterate over the files, copying the fieldnames property from the previous reader, but I was hoping for something which let me put all the processing within a single loop body.
An iterable might help
from io import BytesIO
a = BytesIO(b"1st file 1st line \n1st file 2nd line")
b = BytesIO(b"2nd file 1st line \n2nd file 2nd line")
class Reader:
def __init__(self, *files):
self.files = files
self.current_idx = 0
def __iter__(self):
return self
def __next__(self):
f = self.files[self.current_idx]
for line in f:
return line
else:
if self.current_idx < len(self.files) - 1:
self.current_idx += 1
return next (self)
raise StopIteration("feed me more files")
r = Reader(a, b)
for l in r:
print(l)
Result:
b'1st file 1st line \n'
b'1st file 2nd line'
b'2nd file 1st line \n'
b'2nd file 2nd line'
Edit:
:D then there are standard library goodies.
https://docs.python.org/3.7/library/fileinput.html
with fileinput.input(files=('spam.txt', 'eggs.txt')) as f:
for line in f:
process(line)
You could create a class that's an iterator that returns a string each time its __next__() method is called (quoting the docs).
import csv
class ChainedCSVfiles:
def __init__(self, filenames):
self.filenames = filenames
def __iter__(self):
return next(self)
def __next__(self):
for filename in self.filenames:
with open(filename, 'r', newline='') as csvfile:
for line in csvfile:
yield line
filenames = 'segment-1.dat', 'segment-2.dat'
reader = csv.DictReader(ChainedCSVfiles(filenames),
fieldnames=('field1', 'field2', 'field3'))
for row in reader:
print(row)

IOError: [Errno 2] No such file or directory error is presented for the ouptup file

I'm making a Python code to manipulate text files. The code will receive from the command line input file and output file names and a flag -sort, -reverse etc according to manipulation to apply on the input file and finally write the data to output file. I need to do all this job inside a class so the code will be inheritable. So far I have a code like this:
import argparse
import random
class Xiv(object):
def __init__(self):
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group()
group.add_argument("-s", "-sort", action="store_true")
group.add_argument("-r", "-reverse", action="store_true")
group.add_argument("-sh", "-shuffle", action="store_true")
parser.add_argument("inputfile", type = file, help="Input file name")
parser.add_argument("outputfile", type = file, help="Output file name")
args = parser.parse_args()
source =args.inputfile
dist = args.outputfile
def sort(self):
f = open(source, "r")
list1 = [line for line in f if line.strip()]
f.close()
list.sort()
with open(dist, 'wb') as fl:
for item in list:
fl.write("%s" % item)
def reverse(self, source, dist):
f = open(source, "r")
list2 = [line for line in f if line.strip()]
f.close()
list2.reverse()
with open(dist, 'wb') as f2:
for item in list2:
f2.write("%s" % item)
def shuffle(self, source, dist):
f = open(source, "r")
list3 = [line for line in f if line.strip()]
f.close()
random.shuffle(list3)
with open(dist, 'wb') as f3:
for item in list3:
f3.write("%s" % item)
x = Xiv();
Now when I run it as
python xiv.py -s text.txt out.txt
it presents the following error
IOError: [Errno 2] No such file or directory 'out.txt'
But 'out.txt' is going to be the output file, I suggest the code to create it in case the file is not already existing. And it worked before I put this code inside the class....
In Python 2, when I call file on a nonexistent file I get this error:
In [13]: file('out.txt')
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-13-d0d554b7d5b3> in <module>()
----> 1 file('out.txt')
IOError: [Errno 2] No such file or directory: 'out.txt'
In Py2, file is equivalent to open. It opens a file, default in r mode, and thus will give this error if the file does not exist.
In argparse, the type=foo means run the function foo using the input string. It does not mean 'interpret the string as an object of this type'.
In your code:
with open(dist, 'wb') as f2:
for item in list2:
f2.write("%s" % item)
That means you expect dist to be a filename, a string. You don't want the parser to open the file first. You are doing that yourself. So don't specify a type parameter.
#tmoreau - Python3 dropped that file function, leaving only open.
The open function opens a file for reading by default. What you want is to open it in write/create mode, which will create it if it doesn't exist and allow you to write to it:
with open(dist, 'w+') as f3:
The second argument specifies the open mode, and it defaults to 'r', meaning read only.
I'm not sure why it worked before you moved it into the class - by all accounts, it should make no difference.
I was getting this error..
jemdoc index
Traceback (most recent call last):
File "/usr/bin/jemdoc", line 1563, in
main()
File "/usr/bin/jemdoc", line 1555, in main
infile = open(inname, 'rUb')
IOError: [Errno 2] No such file or directory: 'index.jemdoc'
then i have changed
infile = open(inname, 'rUb')
to
infile = open(inname, 'w+')
Then the error was solved.

Try except not catching IOError from class

I have a class that reads a file of a particular format. These files tend to be greater than 8Gb in size so are usually compressed. When reading the file in I wanted to catch the error of the file not being compressed but neither except IOError: nor except: will do so, for some reason I don't understand.
There are a few classes defined together in the file VCF.py, though the offending class is vcfReader(). The file from which the object is instantiated is below test.py, and lastly the Traceback.
Anyone have any ideas as to why it isn't working?
VCF.py
import gzip
import sys
class Call():
'''
Class to handle the sample genotypes and associated information
'''
def __init__(self,site,sample,format,data):
#do stuff here#
class Variant():
'''
Class for a single row from a VCF file.
'''
def __init__(self, entry, samples):
#do other stuff here
class vcfReader():
'''
read a compressed vcf file ignoring the meta-information, but parsing the header for sample names
'''
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader()
def readHeader(self):
line = self.vcfFile.next()
while line.startswith('#'):
if line[1]!='#':
#lines that start with ##, i.e. meta tags are ignored. Header line starting with '#', sample names are extracted.
return line.rstrip().rsplit('\t')[9:]
else:
line = self.vcfFile.next()
def __iter__(self):
return self
def next(self):
row = self.vcfFile.next()
return Variant(row, self.samples)
and then test.py
import VCF
from collections import Counter
if __name__=='__main__':
vcfreader = VCF.vcfReader('all_samples.vcf')
filters = []
for i in vcfreader:
filters.extend(i.FILTERS)
filters = Counter(filters)
for k,v in filters.iteritems():
print "{0}: {1}".format(k,v)
Here is the traceback:
Traceback (most recent call last):
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\test.py", line 10, in <module>
vcfreader = VCF.vcfReader('all_samples.vcf')
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 95, in __init__
self.samples = self.readHeader()
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 98, in readHeader
line = self.vcfFile.next()
File "C:\Python27\lib\gzip.py", line 450, in readline
c = self.read(readsize)
File "C:\Python27\lib\gzip.py", line 256, in read
self._read(readsize)
File "C:\Python27\lib\gzip.py", line 291, in _read
self._read_gzip_header()
File "C:\Python27\lib\gzip.py", line 185, in _read_gzip_header
raise IOError, 'Not a gzipped file'
IOError: Not a gzipped file
The reason your except block doesn't catch the exception is that it happens outside the try block:
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader() # <<<<<<<< exception is raised here

Categories

Resources