I have a class that reads a file of a particular format. These files tend to be greater than 8Gb in size so are usually compressed. When reading the file in I wanted to catch the error of the file not being compressed but neither except IOError: nor except: will do so, for some reason I don't understand.
There are a few classes defined together in the file VCF.py, though the offending class is vcfReader(). The file from which the object is instantiated is below test.py, and lastly the Traceback.
Anyone have any ideas as to why it isn't working?
VCF.py
import gzip
import sys
class Call():
'''
Class to handle the sample genotypes and associated information
'''
def __init__(self,site,sample,format,data):
#do stuff here#
class Variant():
'''
Class for a single row from a VCF file.
'''
def __init__(self, entry, samples):
#do other stuff here
class vcfReader():
'''
read a compressed vcf file ignoring the meta-information, but parsing the header for sample names
'''
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader()
def readHeader(self):
line = self.vcfFile.next()
while line.startswith('#'):
if line[1]!='#':
#lines that start with ##, i.e. meta tags are ignored. Header line starting with '#', sample names are extracted.
return line.rstrip().rsplit('\t')[9:]
else:
line = self.vcfFile.next()
def __iter__(self):
return self
def next(self):
row = self.vcfFile.next()
return Variant(row, self.samples)
and then test.py
import VCF
from collections import Counter
if __name__=='__main__':
vcfreader = VCF.vcfReader('all_samples.vcf')
filters = []
for i in vcfreader:
filters.extend(i.FILTERS)
filters = Counter(filters)
for k,v in filters.iteritems():
print "{0}: {1}".format(k,v)
Here is the traceback:
Traceback (most recent call last):
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\test.py", line 10, in <module>
vcfreader = VCF.vcfReader('all_samples.vcf')
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 95, in __init__
self.samples = self.readHeader()
File "C:\Users\Davy\Documents\Programming\VCF_stuff\src\VCF.py", line 98, in readHeader
line = self.vcfFile.next()
File "C:\Python27\lib\gzip.py", line 450, in readline
c = self.read(readsize)
File "C:\Python27\lib\gzip.py", line 256, in read
self._read(readsize)
File "C:\Python27\lib\gzip.py", line 291, in _read
self._read_gzip_header()
File "C:\Python27\lib\gzip.py", line 185, in _read_gzip_header
raise IOError, 'Not a gzipped file'
IOError: Not a gzipped file
The reason your except block doesn't catch the exception is that it happens outside the try block:
def __init__(self, file):
try:
self.vcfFile = gzip.open(file, 'rb')
except IOError:
print "Not a gzipped file"
sys.exit()
self.samples = self.readHeader() # <<<<<<<< exception is raised here
Related
My code is as follows:
import json
import pandas as pd
from difflib import SequenceMatcher
description_file = open("description.json", encoding="utf-8")
nodescription_file = open("nodescription.json", encoding="utf-8")
desc = json.load(description_file)
nodesc = json.load(nodescription_file)
stack = []
def get_values():
data = []
for pages in nodesc:
for rows in pages["dataRows"]:
skip = False
if skip:
break
email = ""
companyname = rows["columnValues"]["companyName"][0]["name"]
fullname = ""
firmName = rows["columnValues"]["firm"][0]["name"]
pbid = rows["columnValues"]["companyName"][0]["pbId"]
managementposition = rows["columnValues"]["managementPosition"][0]["value"]
if len(rows["columnValues"]["email"]):
email = rows["columnValues"]["email"][0]["value"]
else:
email = "No email"
if len(rows["columnValues"]["fullName"]):
fullname = rows["columnValues"]["fullName"][0]["name"]
for desc_rows in desc["dataRows"]:
pbid_desc = desc_rows["columnValues"]["investorName"][0]["pbId"]
description = ""
if len(desc_rows["columnValues"]["description"]):
description = desc_rows["columnValues"]["description"][0]["value"]
else:
description = "No description"
if pbid == pbid_desc:
data.append({"Full Name": fullname, "Email": email, "Company Name": companyname, "Position":managementposition, "Description": description})
save_data(data, "file7.csv")
def similar(a, b): #Dont use this anymore, pbid was relational
return SequenceMatcher(None, a, b).ratio()
def instack(string):
for i in stack:
if string == i:
return True
return False
def save_data(data, name):
pd.read_json(json.dumps(data, ensure_ascii=False).encode('utf8'), encoding='utf-8').to_csv(name, encoding="utf-8")
get_values()
description_file.close()
nodescription_file.close()
I am getting an error of:
Traceback (most recent call last):
File "/Users/dan/Desktop/Upwork/main.py", line 69, in <module>
get_values()
File "/Users/dan/Desktop/Upwork/main.py", line 53, in get_values
save_data(data, "file7.csv")
File "/Users/dan/Desktop/Upwork/main.py", line 68, in save_data
pd.read_json(json.dumps(data, ensure_ascii=False).encode('utf8'), encoding='utf-8').to_csv(name, encoding="utf-8")
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/util/_decorators.py", line 207, in wrapper
return func(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/util/_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 588, in read_json
json_reader = JsonReader(
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 673, in __init__
data = self._get_data_from_filepath(filepath_or_buffer)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 710, in _get_data_from_filepath
self.handles = get_handle(
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/common.py", line 826, in get_handle
raise TypeError(
TypeError: Expected file path name or file-like object, got <class 'bytes'> type
Please help - I am a total noob. Thank you so much. The above is not code it is an error and the stackoverflow autobot is making be write more of a questions because it cannot discern the code text from the result text - or I am just a noob. Please help. Thank you so much in advance. I am on PC using Visual Studio and have already installed pip pandas and pip cdifflib is not installing for some reason with exit code 1120 legacy install failure.
The error you get is because read_json expects as a fisrt argument either a path to a file or a file-like object, but you're giving it a bytes string.
I think what should work is to convert your bytes string into a bytes buffer that is a file-like object, with io.BytesIO :
pd.read_json(io.BytesIO(json.dumps(data, ensure_ascii=False).encode('utf8')), encoding='utf-8').to_csv(name, encoding="utf-8")
I've written a tkinter app in Python 3.7, which extracts from main CSV, pre-filtered by user, information into smaller reports. My issue is that while writing filtered data from main CSV into report I occasionally get a PermissionError:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\mdiakuly\AppData\Roaming\Python\Python37\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 286, in report_generation_prep
self.report_generation(names_list, relevant_params, specific_value, extracted_file)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 344, in report_generation
processing_data(rd,column)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 336, in processing_data
writing_new_report(gathering_row)
File "D:/PycharmProjects/report_extractor_hil_sil/report_extractor.py", line 299, in writing_new_report
with open(extracted_file, 'a+', newline='') as write_in:
PermissionError: [Errno 13] Permission denied: 'C:/Users/myusername/Desktop/reporter/Partial_report_14_10_2021T15-13-12.csv'
Once it has extracted only one row and through an error, another time it did whole extraction with no error, and few other times it extracted thousands of rows and through an error.
CSV file which is being written into was never opened during info extraction.
Has anyone faced the same issue or maybe has an idea how to fix it?
def writing_new_report(complete_row):
with open(extracted_file, 'a+', newline='') as write_in:
wt = csv.DictWriter(write_in, delimiter=';', fieldnames=relevant_params)
if self.debug:
print(complete_row)
wt.writerow(complete_row)
def processing_data(r_d,column='def'):
for idx, row in enumerate(r_d): #looping through major csv
self.progress.update()
if self.debug:
print(f'{idx:}',end=' / ')
gathering_row = {}
if column != 'def':
if row[column] not in names_list:
continue
else:
names_list.remove(row[column])
pass
else:
pass
for param, value in zip(relevant_params,specific_value):
self.progress.update()
if self.debug:
print(f'{row[param]:}',end=' / ')
gathering_row[param] = row[param]
if value == '---- All ----':
pass
elif value != row[param]:
if self.debug:
print(f'{row[param]:} - Skipped')
break
if param == relevant_params[len(relevant_params)-1]:
if self.debug:
print(f'{row[param]:} - Written')
writing_new_report(gathering_row)
I was try to dump and load dictionary to json file using Python. I can dump file without a problem. However, when i try to load file into temp dictionary, error occur. I can not figure out the issue, Can anyone can help me on this ?Thanks
import os
import json
def get_stored_birth():
filename ='C:/Users/Sam/name.json'
temp = {}
with open(filename,'r+') as f_obj1:
temp =json.load(f_obj1)
print(temp.get(name),"is the birthday of",name)
def get_new_birth():
birth=str(input())
my_dict[name]=birth
print("Birthday database updated")
filename ='C:/Users/Sam/name.json'
with open(filename,'a') as f_obj:
f_obj.write('\n')
json.dump(my_dict,f_obj)
return name
my_dict={}
def quit():
"""This function quit program"""
return quit
while True:
filename ='C:/Users/Sam/name.json'
print("Enter a name:(blank to quit)")
name= str(input())
if name=="":
exit()
if name in my_dict:
name= get_stored_birth()
else:
print("I dont have info for",name)
print("What is their birthday")
name= get_new_birth()
The traceback as follow:
Traceback (most recent call last):
File "C:\Users\Sam\Desktop\Tianxu_Assignment2\Assignment 2.py", line 45, in <module>
name= get_stored_birth()
File "C:\Users\Sam\Desktop\Tianxu_Assignment2\Assignment 2.py", line 10, in get_stored_birth
temp =json.load(f_obj1)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\__init__.py", line 299, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python36-32\lib\json\decoder.py", line 342, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 3 column 1 (char 12)
Problem solved !!!
1. replace with open(filename, 'a') as f_obj with replace with open(filename, 'w')
2.
if name in my_dict:
should not check my_dict !!! every time start a program will use new "dictionary". I move
filename ='C:/Users/Sam/name.json'
temp = {}
with open(filename,'r+') as f_obj1:
temp =json.load(f_obj1)
to main loop and check if name in temp:
Thanks guys!!!
You are appending new json to previous jsons you have created. Just substitute this line:
with open(filename,'a') as f_obj:
with this one:
with open(filename,'w') as f_obj:
I am trying to read in large JSON file (data.json) in Python. Because the JSON file has multiple JSON objects, and multiple dictionaries will be created in Python(the number of dictionaries are unknown), I used decoder.raw_decode() and generator.
The following is the code:
import json
import pprint
import io
import pprint
def parse():
with open('data.json',encoding='utf-8') as jfile:
try:
while True:
decoder = json.JSONDecoder()
obj, idx = decoder.raw_decode(jfile)
yield obj
except ValueError as e:
print(e)
pass
else:
print("aha")
def main():
imputd=parse()
if imputd:
while True:
try:
print(str(next(imputd)).readlines())
except StopIteration as e:
print(e)
break
main()
I get the error:
Traceback (most recent call last):
File "H:\Document\Python\j10.py", line 57, in <module>
main()
File "H:\Document\Python\j10.py", line 36, in main
print(str(next(imputd)).readlines())
File "H:\Document\Python\j10.py", line 21, in parse
obj, idx = decoder.raw_decode(jfile)
File "C:\Python34\lib\json\decoder.py", line 360, in raw_decode
obj, end = self.scan_once(s, idx)
TypeError: first argument must be a string, not _io.TextIOWrapper
I edited code based on Martijn's answer:
import json
import io
file=open('data.json.txt')
def readin():
return file.read(2000)
def parse():
decoder = json.JSONDecoder()
buffer = ''
for chunk in iter(readin, ''):
buffer += chunk
while buffer:
try:
result, index = decoder.raw_decode(buffer)
yield result
buffer = buffer[index:]
except ValueError:
# Not enough data to decode, read more
break
def main():
imputd=parse()
if imputd:
while True:
try:
print(str(next(imputd)).readlines())
except StopIteration as e:
print(e)
break
main()
and I get an UnicodeError:
Traceback (most recent call last):
File "H:\Document\Python\j11.py", line 35, in <module>
main()
File "H:\Document\Python\j11.py", line 30, in main
print(str(next(imputd)).readlines())
File "H:\Document\Python\j11.py", line 14, in parse
for chunk in iter(readin, ''):
File "H:\Document\Python\j11.py", line 8, in readin
return file.read(2000)
File "C:\Python34\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 4217: character maps to <undefined>
You are passing in the file object, but decoder.raw_decode() only takes text data. You need to do the reading yourself:
obj, idx = decoder.raw_decode(jfile.read())
You are then yielding Python objects created from the JSON data, so your .readlines() call in your main() function loop will also fail.
You are not using raw_decode() correctly, however. You are yourself responsible for feeding it chunks of text, it'll not read that text from the file for you. If you wanted to handle the file in chunks, and there are no clear delimiters between the JSON entries, you'll be forced to read the file in blocks:
decoder = json.JSONDecoder()
buffer = ''
for chunk in iter(partial(jfile.read, buffersize), ''):
buffer += chunk
while buffer:
try:
result, index = decoder.raw_decode(buffer)
yield result
buffer = buffer[index:]
except ValueError:
# Not enough data to decode, read more
break
This will still yield completely decoded objects; if your file is one long JSON object (like one top-level list or dictionary) then this'll not yield the contents of that object one by one; it'll still read the whole object before yielding.
I have quite large csv file and i am using csv module to read csv file and process it
and below code snippet which i have in my project. The file has around 9828075 records in file the code worked fine till 637922th record later which it raise below error:
ERROR Tue, 14 Apr 2013 09:59:29 Traceback (most recent call last):
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 316, in next
File "E:\MyProject\DataExa\Python26\lib\csv.py", line 104, in next
row = self.reader.next()
File "build\bdist.win32\egg\my_proj\csv_reader.py", line 366, in capture_record_data
IOError: [Errno 13] Permission denied
My code looks like below...
import csv
class MyCsvReader (object):
"""
"""
def __init__ (self, import_source, dialect='excel'):
"""
"""
self.import_source = import_source
self.dialect = dialect
self.post_init()
def post_init(self):
"""
Do any post init logic....
"""
pass
def init_reader (self):
self.import_file = file(self.import_source, 'rU')
#_reader = csv.reader (self.capture_record_data(self.import_file),
# dialect=self.dialect)
# create a CSV iterator that returns a dict with each next() call
self.reader = csv.DictReader(self.capture_record_data(self.import_file),
dialect=self.dialect)
def next (self):
"""
Returns a dict containing data read from the CSV file.
"""
#todo: do a magic to remove all spaces in data....
return self.reader.next()
def __iter__ (self):
"Special method to make an instance of this class into an iterator"
return self
def capture_record_data (self, row_iterator):
"""
Generator for capturing the record data before passing to csv
"""
for row in row_iterator:
self.raw_data = row
yield row
def close(self):
if hasattr(self, 'import_file'):
self.import_file.close()
if __name__ == '__main__':
reader_obj = MyCsvReader (import_source='test.csv')
reader_obj.init_reader()
while True:
try:
print reader_obj.reader.next()
except StopIteration, e:
break
Could any one help on this to figure out why does i am getting IOError: [Errno 13] Permission denied error
while processing file.