So, this is my code.
def classMaker(csv):
csv = csv.split("/n")
firstLine = csv[0]
csv = csv[1:]
class newClass():
def __init__(self, line):
self.vars = firstLine
for i in range(len(line)):
self[firstLine[i]] = line[i]
return [newClass(line) for line in csv]
The problem is an AttributeError in self[firstLine[i]] = line[i]. It says
AttributeError: newClass instance has no attribute '__setitem__'
I don't know why it is causing this error. My goal is to take in a csv file exported from Excel and auto-generate object names from field names.
Thank you in advance.
You can avoid the newClass all together if you use collections.namedtuple:
CSVRow = namedtuple("CSVRow", firstLine)
return [CSVRow(*line) for line in csv]
This assumes that the CSV headers will be valid Python identifiers (that is, if you have entires like "Some Value" this won't work if you don't process firstLine.
This will let you do things like this:
# Let's assume your CSV has a Name field
# and that it is the first column
csv_data[3].Name == csv_data[3][0]
# True
Also, you should look into the csv module to simplify CSV processing.
If I can infer your intent correctly, you want to replace this line:
self[firstLine[i]] = line[i]
with this:
setattr(self, firstline[i], line[i])
This will create an attribute of your newClass object named after the column in your data.
E.g.:
Name, Date, Weight
Joe, 23-Sep, 99
...
and
data = classMaker('file.csv')
will produce :
data[0].Name == 'Joe'
P.s. I assume that you will add file I/O, parsing the CSV file, and other missing elements.
P.p.s: You can avoid the loop counter i altogether:
for attr, val in zip(firstLine, line):
setattr(self, attr, val)
P.p.s: Here is a complete working sample:
import csv
def classMaker(filename):
class newClass(object):
def __init__(self, line):
for attr, val in zip(firstLine, line):
setattr(self, attr, val)
with open(filename, 'rb') as csvfile:
spamreader = csv.reader(csvfile)
firstLine = spamreader.next()
return [newClass(line) for line in spamreader]
x = classMaker("/tmp/x.csv")
print x[0].Name
Related
I want to open csv file for reading purpose. But I'm facing some exceptions regarding to that.
I'm using Python 2.7.
main.python-
if __name__ == "__main__":
f = open('input.csv','r+b')
m = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
reader = csv.DictReader(iter(m.readline, ""))
for read in reader:
num = read['time']
print num
output-
Traceback (most recent call last):
File "/home/PycharmProjects/time_gap_Task/main.py", line 22, in <module>
for read in reader:
File "/usr/lib/python3.4/csv.py", line 109, in __next__
self.fieldnames
File "/usr/lib/python3.4/csv.py", line 96, in fieldnames
self._fieldnames = next(self.reader)
_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
How to resolve this error? and how to open csv file using mmap and csv in good manner so code is working perfect?
I know you asked this a while ago, but I actually created a module for myself that does this, because I do a lot of work with large CSV files, and sometimes I need to convert them into dictionaries, based on a key. Below is the code I've been using. Please feel free to modify as needed.
def MmapCsvFileIntoDict(csvFilePath, skipHeader = True, transform = lambda row: row, keySelector = lambda o: o):
"""
Takes a CSV file path and uses mmap to open the file and return a dictionary of the contents keyed
on the results of the keySelector. The default key is the transformed object itself. Mmap is used because it is
a more efficient way to process large files.
The transform method is used to convert the line (converted into a list) into something else. Hence 'transform'.
If you don't pass it in, the transform returns the list itself.
"""
contents = {}
firstline = False
try:
with open(csvFilePath, "r+b") as f:
# memory-map the file, size 0 means whole file
mm = mmap.mmap(f.fileno(), 0)
for line in iter(mm.readline, b''):
if firstline == False:
firstline = True
if skipHeader == True:
continue
row = ''
line = line.decode('utf-8')
line = line.strip()
row = next(csv.reader([line]), '')
if transform != None and callable(transform):
if row == None or row == '':
continue
value = transform(row)
else:
value = row
if callable(keySelector):
key = keySelector(value)
else:
key = keySelector
contents[key] = value
except IOError as ie:
PrintWithTs('Error decomposing the companies: {0}'.format(ie))
return {}
except:
raise
return contents
When you call this method, you have some options.
Assume you have a file that looks like:
Id, Name, PhoneNumber
1, Joe, 7175551212
2, Mary, 4125551212
3, Vince, 2155551212
4, Jane, 8145551212
The easiest way to call it is like this:
dict = MmapCsvFileIntoDict('/path/to/file.csv', keySelector = lambda row: row[0])
What you get back is a dict looking like this:
{ '1' : ['1', 'Joe', '7175551212'], '2' : ['2', 'Mary', '4125551212'] ...
One thing I like to do is create a class or a namedtuple to represent my data:
class CsvData:
def __init__(self, row):
self.Id = int(row[0])
self.Name = row[1].upper()
self.Phone = int(row[2])
And then when I call the method, I pass in a second lambda to transform each row in the file to an object I can work with:
dict = MmapCsvFileIntoDict('/path/to/file.csv', transform = lambda row: CsvData(row), keySelector = lambda o: o.Id)
What I get back that time looks like:
{ 1 : <object instance>, 2 : <object instance>...
I hope this helps! Best of luck
When open a file with the flag b like this:
f = open('input.csv','r+b')
You read the file as bytes and not as string.
So, try to change the flags to r:
f = open('input.csv','r')
if you just want to read data with specific columnes from csv file, just try:
import csv
with open('input.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
print row['time']
First I'd like to mention that I am completely new to Python and I've found it a bit difficult to transition from C++. I apologize if my question comes off as elementary.
I have a class for 'songs' which I have initialized as following. It takes in data from a file that contains a song's ID, name, genre etc. all separated by ::.
def __init__(self):
self.song_names = dict()
self.song_genres = dict()
def load_songs(self, song_id):
f = open(song_id)
for line in f:
line = line.rstrip()
component = line.split("::")
sid = components[0]
same= components[1]
sgenre=components[2]
self.song_names[mid] = sname
self.song_genres[mid] = sgenre
f.close()
The program also takes in data from a file with 'users' information, separated as
UserID::Gender::Age::Occupation::Zip etc. and a file with 'ratings'.
I would I implement a function like def set_song(sid, list((title,genres)))
and something like delete_song(sid) ?
I'm going to have to wind up doing a ton more other functions, but if someone could help me with those two - at least to have a better idea of structure and syntax - handling the others should be easier.
Why not just inherit from dict and use its interface? That way you can use Python's standard mapping operations instead of rolling your own:
class Songs(dict):
def load(self, song_id):
with open(song_id, 'r') as f:
for line in f:
sid, name, genre = line.rstrip().split('::')[:3]
self[sid] = [name, genre]
mysongs = Songs()
mysongs.load('barnes_and_barnes__fish_heads')
mysongs['barnes_and_barnes__fish_heads'] = ['roly poly', 'strange'] # set
del mysongs['barnes_and_barnes__fish_heads'] # delete
I am making a flashcard program in which I take a text file that contains several columns, such as english word, french equivalent, gender, type of word, etc. My idea was to create a loop that read each line of the text file, separating by tabs, and makes an instance of a user-defined Word object for each line.
In the following block code I import the text file, process it into a list, then attempt to create an instance of a previously defined object: Word. I would like the object to have the second item on the list for it's name so that it is easily searchable, but it's not letting me do this, please can somebody help me with the code:
file = (open('dictionary.txt', 'r')).readline()
import re
line_list = re.split(r'\t', file.rstrip('\n'))
line_list[1] = Word(line_list[0], line_list[1], line_list[2], line_list[3])
Create a dict of instances and use the second item of the lists as key. It's a bad idea to create dynamic variables.
import re
instance_dict = {}
with open('dictionary.txt') as f:
for line in f:
line_list = re.split(r'\t', line.rstrip('\n'))
instance_dict[line_list[1]] = Word(*line_list[:4])
Why the with statement?
It is good practice to use the with keyword when dealing with file
objects. This has the advantage that the file is properly closed after
its suite finishes, even if an exception is raised on the way.
You can also use the csv module:
import csv
instances = {}
with open('dictionary.txt', 'rb') as f:
reader = csv.reader(f, delimiter='\t')
instances = {line[1]: Word(*line) for line in reader}
Here's a cleaner solution using a namedtuple. You'll end up with a dict called "words" which you use to lookup each by name.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pprint
from collections import namedtuple
Word = namedtuple('Word', ['name', 'french', 'gender', 'type_'])
words = {}
with open('dictionary.txt', 'rU') as fin:
for word in (Word(*r.rstrip('\n').split('\t')) for r in fin):
words[word.name] = word
pprint.pprint(words)
Firstly, it's better to use with, as statements to get input from files, as the closing procedures are automatically taken care of. Secondly, to read ALL of the lines from a file, you must use readlines() rather than readline(). Try something like this :
with open('dictionary.txt','r') as file :
line_list = file.readlines()
splitLineList = []
for lines in line_list :
splitLineList.append(re.split(r'\t',lines.strip('\n'))
You may have an appropriate solution depending on few clarification on your requirements
"My idea was to create a loop that read each line of the text file,
separating by tabs, and"
If the text file is already pre-validated or reliable to ignore error-handling (e.g. not evenly separated by single tabs).
with open('dictionary.txt', 'r') as f:
[line.strip().split("\t")
for line in f.read().split("\n")
if line.strip()]
will get you the (comprehensive) list required to create Word object instances, without using re
"then attempt to create an instance of a previously defined object:
Word."
with open('dictionary.txt', 'r') as f:
[Word(line.strip().split("\t"))
for line in f.read().split("\n")
if line.strip()]
"I would like the object to have the second item on the list for it's
name so that it is easily searchable,"
Can you rewrite this with an example?
but it's not letting me do this,
line_list[1] = Word(line_list[0], line_list[1], line_list[2], line_list[3])
Sorry I am loosing you here, why are using line_list[1] to refer newly created Word instances where line_list[1] itself is an argument ?
With your clarification, I would have something like this
Reworked Code:
from pprint import pprint
My assumption on your Class definition:
class Word():
def __init__(self, **kwargs):
self.set_attrs(**kwargs)
def __call__(self):
return self.get_attr("swedish_word")
def set_attrs(self, **kwargs):
for k, v in kwargs.iteritems():
setattr(self, k, v)
def get_attr(self, attr):
return getattr(self, attr)
def get_attrs(self):
return ({attr.upper():getattr(self, attr) for attr in self.__dict__.keys()})
def print_attrs(self):
pprint(self.get_attrs())
if __name__ == '__main__':
# sample entries in dictionary.txt
# swedish_word english_word article word_type
# hund dog ett noun
# katt cat ett noun
# sova sleep ett verb
with open('dictionary.txt', 'r') as f:
header = f.readline().strip().split("\t")
instances = [Word(**dict(zip(header, line.strip().split("\t"))))
for line in f.read().split("\n")
if line.strip()]
# for line in f.read().split("\n"):
# data = dict(zip(header, line.strip().split("\t")))
# w = Word(**data)
You can get instance properties for a given swedish_word like this
def print_swedish_word_properties(swedish_word):
for instance in instances:
if instance() == swedish_word:
print "Properties for Swedish Word:", swedish_word
instance.print_attrs()
print_swedish_word_properties("hund")
to have output like this
Properties for Swedish Word: hund
{'ARTICLE': 'ett',
'ENGLISH_WORD': 'dog',
'SWEDISH_WORD': 'hund',
'WORD_TYPE': 'noun'}
or you can use any other class methods to search instances on various attributes
First time writing a class here and I need a little help.
I've been trying to write a class in which the first takes a tab-delimited csv file and outputs a list of dictionaries. Each of the keys in the dictionary is a column title in the csv.
So far, this is what my class looks like:
import csv
class consolidate(object):
def __init__(self, file):
self.file = file
def create_master_list(self):
with(open(self,'rU')) as f:
f_d = csv.DictReader(f, delimiter = '\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
When I try to pass it a file, as follows:
c = consolidate()
a = c.create_master_list('Abilities.txt')
I get the following error:
TypeError: __init__() takes exactly 2 arguments (1 given)
I know that what I want to pass a file argument to the create_master_list function, but I'm unsure what the right syntax to do this is.
I've tried self.file and file as arguments, and both do not work as well.
Thanks!
Problem
You did not supply second argument for __init__():
class consolidate(object):
def __init__(self, file):
self.file = file
# rest of the code
while you are instantiating it like this:
c = consolidate()
Solution
This should work. Change class definition to this:
import csv
class consolidate(object):
def __init__(self, filename):
self.filename = filename
def create_master_list(self):
with open(self.filename, 'rU') as f:
f_d = csv.DictReader(f, delimiter='\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
and then use it like this:
c = consolidate('Abilities.txt')
a = c.create_master_list()
This is one way of achieving the fix.
Note: I also changed the naming (self.file suggested it is file object, while it actually is a file name, thus self.filename). Also keep in mind that the path is relative to from where you execute the script.
You should pass the file as a parameter to __init__.
c = consolidate ('abilities.txt')
Then inside create_master_list you should open self.file.
with (open (self.file, 'rU') ) as f:
Now you can call
a = c.create_master_list ()
That's because your __init__ method of consolidate needs an argument for file:
def __init__(self, file):
but you don't give it anything:
c = consolidate()
To fix this problem, change your class like so:
import csv
# I capitalized the name of this class because that is convention
class Consolidate(object):
def __init__(self, file):
self.file = file
def create_master_list(self):
# 'self' is the instance of 'Consolidate'
# you want to open 'self.file' instead, which is the file
with(open(self.file,'rU')) as f:
f_d = csv.DictReader(f, delimiter = '\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
and then use it like this:
c = Consolidate('Abilities.txt')
a = c.create_master_list()
I have a function I am using to read in files of a particular format. My function looks likes this:
import csv
from collections import namedtuple
def read_file(f, name, header=True):
with open(f, mode="r") as infile:
reader = csv.reader(infile, delimiter="\t")
if header is True:
next(reader)
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in reader:
row = data(*row)
yield row
I also have another type of file that I would like to read in with this function. However, the other file type needs a few slight parsing steps before I can use the read_file function. For example, trailing periods need to be striped from column q and the characters atr need to be appended to the id column. Obviously, I could create a new function, or add some optional arguments to the existing function, but is there a simple way to modify this function so that it can be used to read in an additional file type(s)? I was thinking of something along the lines of a decorator?
IMHO, the most Pythonic way would be converting the function to a base class, split file operations into methods and overriding these methods in new classes based on your base class.
Having such a monolithic function that takes a filename instead of an open file is by itself not very Pythonic. You are trying to implement a stream processor here (file stream -> line stream -> CSV record stream -> [transformator ->] data stream), so using a generator is actually a good idea. I'd slightly refactor this to be a bit more modular:
import csv
from collections import namedtuple
def csv_rows(infile, header):
reader = csv.reader(infile, delimiter="\t")
if header: next(reader)
return reader
def data_sets(infile, header):
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in csv_rows(infile, header):
yield gene_data(*row)
def read_file_type1(infile, header=True):
# for this file type, we only need to pass the caller the raw
# data objects
return data_sets(infile, header)
def read_file_type2(infile, header=True):
# for this file type, we have to pre-process the data sets
# before yielding them. A good way to express this is using a
# generator expression (we could also add a filtering condition here)
return (transform_data_set(x) for x in data_sets(infile, header))
# Usage sample:
with open("...", "r") as f:
for obj in read_file_type1(f):
print obj
As you can see, we have to pass the header argument all the way through the function chain. This is a strong hint that an object-oriented approach would be appropriate here. The fact that we obviously face a hierarchical type structure here (basic data file, type1, type2) supports this.
I suggest you to create some row iterator like following:
with MyFile('f') as f:
for entry in f:
foo(entry)
You can do this by implementing a class for your own files with the following traits:
with ( http://docs.python.org/reference/compound_stmts.html#the-with-statement )
container ( http://docs.python.org/reference/datamodel.html#emulating-container-types )
Next to it you may create some function open_my_file(filename) that determines the file type and returns propriate file object to work with. This might be slightly enterprise way, but it worth to implement if you're dealing with multiple file types.
The object-oriented way would be this:
class GeneDataReader:
_GeneData = namedtuple('GeneData', 'id, name, q, start, end, sym')
def __init__(self, filename, has_header=True):
self._ignore_1st_row = has_header
self._filename = filename
def __iter__():
for row in self._tsv_by_row():
yield self._GeneData(*self.preprocess_row(row))
def _tsv_by_row(self):
with open(self._filename, 'r') as f:
reader = csv.reader(f, delimiter='\t')
if self._ignore_1st_row:
next(reader)
for row in reader:
yield row
def preprocess_row(self, row):
# does nothing. override in derived classes
return row
class SpecializedGeneDataReader(GeneDataReader):
def preprocess_row(self, row):
row[0] += 'atr'
row[2] = row[2].rstrip('.')
return row
The simplest way would be to modify your currently working code with an extra argument.
def read_file(name, is_special=False, has_header=True):
with open(name,'r') as infile:
reader = csv.reader(infile, delimiter='\t')
if has_header:
next(reader)
Data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in reader:
if is_special:
row[0] += 'atr'
row[2] = row[2].rstrip('.')
row = Data(*row)
yield row
If you are looking for something less nested but still procedure based:
def tsv_by_row(name, has_header=True):
with open(f, 'r') as infile: #
reader = csv.reader(infile, delimiter='\t')
if has_header: next(reader)
for row in reader:
yield row
def gene_data_from_vanilla_file(name, has_header=True):
for row in tsv_by_row(name, has_header):
yield gene_data(*row)
def gene_data_from_special_file(name, has_header=True):
for row in tsv_by_row(name, has_header):
row[0] += 'atr'
row[2] = row[2].rstrip('.')
yield GeneData(*row)
How about passing a callback function to read_file()
In the spirit of Niklas B.'s answer:
import csv, functools
from collections import namedtuple
def consumer(func):
#functools.wraps(func)
def start(*args, **kwargs):
g = func(*args, **kwargs)
g.next()
return g
return start
def csv_rows(infile, header, dest):
reader = csv.reader(infile, delimter='\t')
if header: next(reader)
for line in reader:
dest.send(line)
#consumer
def data_sets(dest):
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
while 1:
row = (yield)
dest.send(gene_data(*row))
def read_file_1(fn, header=True):
results, sink = getsink()
csv_rows(fn, header, data_sets(sink))
return results
def getsink():
r = []
#consumer
def _sink():
while 1:
x = (yield)
r.append(x)
return (r, _sink())
#consumer
def transform_data_sets(dest):
while True:
data = (yield)
dest.send(data[::-1]) # or whatever
def read_file_2(fn, header=True):
results, sink = getsink()
csv_rows(fn, header, data_sets(transform_data_sets(sink)))
return results