Passing a file to a class - python

First time writing a class here and I need a little help.
I've been trying to write a class in which the first takes a tab-delimited csv file and outputs a list of dictionaries. Each of the keys in the dictionary is a column title in the csv.
So far, this is what my class looks like:
import csv
class consolidate(object):
def __init__(self, file):
self.file = file
def create_master_list(self):
with(open(self,'rU')) as f:
f_d = csv.DictReader(f, delimiter = '\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
When I try to pass it a file, as follows:
c = consolidate()
a = c.create_master_list('Abilities.txt')
I get the following error:
TypeError: __init__() takes exactly 2 arguments (1 given)
I know that what I want to pass a file argument to the create_master_list function, but I'm unsure what the right syntax to do this is.
I've tried self.file and file as arguments, and both do not work as well.
Thanks!

Problem
You did not supply second argument for __init__():
class consolidate(object):
def __init__(self, file):
self.file = file
# rest of the code
while you are instantiating it like this:
c = consolidate()
Solution
This should work. Change class definition to this:
import csv
class consolidate(object):
def __init__(self, filename):
self.filename = filename
def create_master_list(self):
with open(self.filename, 'rU') as f:
f_d = csv.DictReader(f, delimiter='\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
and then use it like this:
c = consolidate('Abilities.txt')
a = c.create_master_list()
This is one way of achieving the fix.
Note: I also changed the naming (self.file suggested it is file object, while it actually is a file name, thus self.filename). Also keep in mind that the path is relative to from where you execute the script.

You should pass the file as a parameter to __init__.
c = consolidate ('abilities.txt')
Then inside create_master_list you should open self.file.
with (open (self.file, 'rU') ) as f:
Now you can call
a = c.create_master_list ()

That's because your __init__ method of consolidate needs an argument for file:
def __init__(self, file):
but you don't give it anything:
c = consolidate()
To fix this problem, change your class like so:
import csv
# I capitalized the name of this class because that is convention
class Consolidate(object):
def __init__(self, file):
self.file = file
def create_master_list(self):
# 'self' is the instance of 'Consolidate'
# you want to open 'self.file' instead, which is the file
with(open(self.file,'rU')) as f:
f_d = csv.DictReader(f, delimiter = '\t')
m_l = []
for d in f_d:
m_l.append(d)
return m_l
and then use it like this:
c = Consolidate('Abilities.txt')
a = c.create_master_list()

Related

How to set value of parent argument to child method?

I have a Paragraph class:
from googletrans import Translator
class Paragraph:
def __init__(self, text, origin_lang='en'):
self.text = text
self.origin_lang = origin_lang
def translate(self, dest_lang='ne'):
translator = Translator()
translation = translator.translate(text = self.text,
dest=dest_lang)
return translation.text
I made a subclass out of it:
class FileParagraph(Paragraph):
def __init__(self, filepath):
super().__init__(text=self.get_from_file())
self.filepath = filepath
def get_from_file(self):
with open(self.filepath) as file:
return file.read()
While Paragraph got the text directly as argument, the subclass generates the text from the get_from_file method.
However, I cannot seem to call the inherited translate method:
fp = FileParagraph("sample.txt")
print(fp.translate(dest_lang='de'))
That throws an error:
Traceback (most recent call last):
File "C:/main.py", line 66, in <module>
fp = FileParagraph("sample.txt")
File "C:/main.py", line 20, in __init__
super().__init__(text=self.get_from_file())
File "C:/main.py", line 25, in get_from_file
with open(self.filepath) as file:
AttributeError: 'FileParagraph' object has no attribute 'filepath'
One solution is to change the subclass init to:
def __init__(self, filepath):
self.filepath = filepath
self.text = self.get_from_file()
However, that means removing the initialization of super(). Is there another solution without having to remove super().__init__?
Or is this not even the case to make use of inheritance?
The error comes from calling the get_from_file method, which relies on self.filepath, before self.filepath is set. Simply changing the order of the two lines in __init__ fixes this
class FileParagraph(Paragraph):
def __init__(self, filepath):
# set member variable first
self.filepath = filepath
# then call super's init
super().__init__(text=self.get_from_file())
def get_from_file(self):
with open(self.filepath) as file:
return file.read()
i think that you should also give a value for the filepath while creating the object here
fp = FileParagraph("sample.txt")
you should also input a value for the filepath along with text
eg
fp = FileParagraph(text = "sample.txt", filepath = " ")

How to chain file objects in Python?

I'm trying to find a simple way to chain file-like objects. I have a single CSV file which is split into a number of segments on disk. I'd like to be able to pass them to csv.DictReader without having to make a concatenated temporary first.
Something like:
files = map(io.open, filenames)
for row in csv.DictReader(io.chain(files)):
print(row[column_name])
But I haven't been able to find anything like io.chain. If I were parsing it myself, I could do something like:
from itertools import chain
def lines(fp):
for line in fp.readlines():
yield line
a = open('segment-1.dat')
b = open('segment-2.dat')
for line in chain(lines(a), lines(b)):
row = line.strip().split(',')
However DictReader needs something it can call read() on, so this method doesn't work. I can iterate over the files, copying the fieldnames property from the previous reader, but I was hoping for something which let me put all the processing within a single loop body.
An iterable might help
from io import BytesIO
a = BytesIO(b"1st file 1st line \n1st file 2nd line")
b = BytesIO(b"2nd file 1st line \n2nd file 2nd line")
class Reader:
def __init__(self, *files):
self.files = files
self.current_idx = 0
def __iter__(self):
return self
def __next__(self):
f = self.files[self.current_idx]
for line in f:
return line
else:
if self.current_idx < len(self.files) - 1:
self.current_idx += 1
return next (self)
raise StopIteration("feed me more files")
r = Reader(a, b)
for l in r:
print(l)
Result:
b'1st file 1st line \n'
b'1st file 2nd line'
b'2nd file 1st line \n'
b'2nd file 2nd line'
Edit:
:D then there are standard library goodies.
https://docs.python.org/3.7/library/fileinput.html
with fileinput.input(files=('spam.txt', 'eggs.txt')) as f:
for line in f:
process(line)
You could create a class that's an iterator that returns a string each time its __next__() method is called (quoting the docs).
import csv
class ChainedCSVfiles:
def __init__(self, filenames):
self.filenames = filenames
def __iter__(self):
return next(self)
def __next__(self):
for filename in self.filenames:
with open(filename, 'r', newline='') as csvfile:
for line in csvfile:
yield line
filenames = 'segment-1.dat', 'segment-2.dat'
reader = csv.DictReader(ChainedCSVfiles(filenames),
fieldnames=('field1', 'field2', 'field3'))
for row in reader:
print(row)

Python class - Set & Delete methods?

First I'd like to mention that I am completely new to Python and I've found it a bit difficult to transition from C++. I apologize if my question comes off as elementary.
I have a class for 'songs' which I have initialized as following. It takes in data from a file that contains a song's ID, name, genre etc. all separated by ::.
def __init__(self):
self.song_names = dict()
self.song_genres = dict()
def load_songs(self, song_id):
f = open(song_id)
for line in f:
line = line.rstrip()
component = line.split("::")
sid = components[0]
same= components[1]
sgenre=components[2]
self.song_names[mid] = sname
self.song_genres[mid] = sgenre
f.close()
The program also takes in data from a file with 'users' information, separated as
UserID::Gender::Age::Occupation::Zip etc. and a file with 'ratings'.
I would I implement a function like def set_song(sid, list((title,genres)))
and something like delete_song(sid) ?
I'm going to have to wind up doing a ton more other functions, but if someone could help me with those two - at least to have a better idea of structure and syntax - handling the others should be easier.
Why not just inherit from dict and use its interface? That way you can use Python's standard mapping operations instead of rolling your own:
class Songs(dict):
def load(self, song_id):
with open(song_id, 'r') as f:
for line in f:
sid, name, genre = line.rstrip().split('::')[:3]
self[sid] = [name, genre]
mysongs = Songs()
mysongs.load('barnes_and_barnes__fish_heads')
mysongs['barnes_and_barnes__fish_heads'] = ['roly poly', 'strange'] # set
del mysongs['barnes_and_barnes__fish_heads'] # delete

New Class doesn't have a setItem Method

So, this is my code.
def classMaker(csv):
csv = csv.split("/n")
firstLine = csv[0]
csv = csv[1:]
class newClass():
def __init__(self, line):
self.vars = firstLine
for i in range(len(line)):
self[firstLine[i]] = line[i]
return [newClass(line) for line in csv]
The problem is an AttributeError in self[firstLine[i]] = line[i]. It says
AttributeError: newClass instance has no attribute '__setitem__'
I don't know why it is causing this error. My goal is to take in a csv file exported from Excel and auto-generate object names from field names.
Thank you in advance.
You can avoid the newClass all together if you use collections.namedtuple:
CSVRow = namedtuple("CSVRow", firstLine)
return [CSVRow(*line) for line in csv]
This assumes that the CSV headers will be valid Python identifiers (that is, if you have entires like "Some Value" this won't work if you don't process firstLine.
This will let you do things like this:
# Let's assume your CSV has a Name field
# and that it is the first column
csv_data[3].Name == csv_data[3][0]
# True
Also, you should look into the csv module to simplify CSV processing.
If I can infer your intent correctly, you want to replace this line:
self[firstLine[i]] = line[i]
with this:
setattr(self, firstline[i], line[i])
This will create an attribute of your newClass object named after the column in your data.
E.g.:
Name, Date, Weight
Joe, 23-Sep, 99
...
and
data = classMaker('file.csv')
will produce :
data[0].Name == 'Joe'
P.s. I assume that you will add file I/O, parsing the CSV file, and other missing elements.
P.p.s: You can avoid the loop counter i altogether:
for attr, val in zip(firstLine, line):
setattr(self, attr, val)
P.p.s: Here is a complete working sample:
import csv
def classMaker(filename):
class newClass(object):
def __init__(self, line):
for attr, val in zip(firstLine, line):
setattr(self, attr, val)
with open(filename, 'rb') as csvfile:
spamreader = csv.reader(csvfile)
firstLine = spamreader.next()
return [newClass(line) for line in spamreader]
x = classMaker("/tmp/x.csv")
print x[0].Name

What is the most Pythonic way to modify the function of a function?

I have a function I am using to read in files of a particular format. My function looks likes this:
import csv
from collections import namedtuple
def read_file(f, name, header=True):
with open(f, mode="r") as infile:
reader = csv.reader(infile, delimiter="\t")
if header is True:
next(reader)
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in reader:
row = data(*row)
yield row
I also have another type of file that I would like to read in with this function. However, the other file type needs a few slight parsing steps before I can use the read_file function. For example, trailing periods need to be striped from column q and the characters atr need to be appended to the id column. Obviously, I could create a new function, or add some optional arguments to the existing function, but is there a simple way to modify this function so that it can be used to read in an additional file type(s)? I was thinking of something along the lines of a decorator?
IMHO, the most Pythonic way would be converting the function to a base class, split file operations into methods and overriding these methods in new classes based on your base class.
Having such a monolithic function that takes a filename instead of an open file is by itself not very Pythonic. You are trying to implement a stream processor here (file stream -> line stream -> CSV record stream -> [transformator ->] data stream), so using a generator is actually a good idea. I'd slightly refactor this to be a bit more modular:
import csv
from collections import namedtuple
def csv_rows(infile, header):
reader = csv.reader(infile, delimiter="\t")
if header: next(reader)
return reader
def data_sets(infile, header):
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in csv_rows(infile, header):
yield gene_data(*row)
def read_file_type1(infile, header=True):
# for this file type, we only need to pass the caller the raw
# data objects
return data_sets(infile, header)
def read_file_type2(infile, header=True):
# for this file type, we have to pre-process the data sets
# before yielding them. A good way to express this is using a
# generator expression (we could also add a filtering condition here)
return (transform_data_set(x) for x in data_sets(infile, header))
# Usage sample:
with open("...", "r") as f:
for obj in read_file_type1(f):
print obj
As you can see, we have to pass the header argument all the way through the function chain. This is a strong hint that an object-oriented approach would be appropriate here. The fact that we obviously face a hierarchical type structure here (basic data file, type1, type2) supports this.
I suggest you to create some row iterator like following:
with MyFile('f') as f:
for entry in f:
foo(entry)
You can do this by implementing a class for your own files with the following traits:
with ( http://docs.python.org/reference/compound_stmts.html#the-with-statement )
container ( http://docs.python.org/reference/datamodel.html#emulating-container-types )
Next to it you may create some function open_my_file(filename) that determines the file type and returns propriate file object to work with. This might be slightly enterprise way, but it worth to implement if you're dealing with multiple file types.
The object-oriented way would be this:
class GeneDataReader:
_GeneData = namedtuple('GeneData', 'id, name, q, start, end, sym')
def __init__(self, filename, has_header=True):
self._ignore_1st_row = has_header
self._filename = filename
def __iter__():
for row in self._tsv_by_row():
yield self._GeneData(*self.preprocess_row(row))
def _tsv_by_row(self):
with open(self._filename, 'r') as f:
reader = csv.reader(f, delimiter='\t')
if self._ignore_1st_row:
next(reader)
for row in reader:
yield row
def preprocess_row(self, row):
# does nothing. override in derived classes
return row
class SpecializedGeneDataReader(GeneDataReader):
def preprocess_row(self, row):
row[0] += 'atr'
row[2] = row[2].rstrip('.')
return row
The simplest way would be to modify your currently working code with an extra argument.
def read_file(name, is_special=False, has_header=True):
with open(name,'r') as infile:
reader = csv.reader(infile, delimiter='\t')
if has_header:
next(reader)
Data = namedtuple("Data", 'id, name, q, start, end, sym')
for row in reader:
if is_special:
row[0] += 'atr'
row[2] = row[2].rstrip('.')
row = Data(*row)
yield row
If you are looking for something less nested but still procedure based:
def tsv_by_row(name, has_header=True):
with open(f, 'r') as infile: #
reader = csv.reader(infile, delimiter='\t')
if has_header: next(reader)
for row in reader:
yield row
def gene_data_from_vanilla_file(name, has_header=True):
for row in tsv_by_row(name, has_header):
yield gene_data(*row)
def gene_data_from_special_file(name, has_header=True):
for row in tsv_by_row(name, has_header):
row[0] += 'atr'
row[2] = row[2].rstrip('.')
yield GeneData(*row)
How about passing a callback function to read_file()
In the spirit of Niklas B.'s answer:
import csv, functools
from collections import namedtuple
def consumer(func):
#functools.wraps(func)
def start(*args, **kwargs):
g = func(*args, **kwargs)
g.next()
return g
return start
def csv_rows(infile, header, dest):
reader = csv.reader(infile, delimter='\t')
if header: next(reader)
for line in reader:
dest.send(line)
#consumer
def data_sets(dest):
gene_data = namedtuple("Data", 'id, name, q, start, end, sym')
while 1:
row = (yield)
dest.send(gene_data(*row))
def read_file_1(fn, header=True):
results, sink = getsink()
csv_rows(fn, header, data_sets(sink))
return results
def getsink():
r = []
#consumer
def _sink():
while 1:
x = (yield)
r.append(x)
return (r, _sink())
#consumer
def transform_data_sets(dest):
while True:
data = (yield)
dest.send(data[::-1]) # or whatever
def read_file_2(fn, header=True):
results, sink = getsink()
csv_rows(fn, header, data_sets(transform_data_sets(sink)))
return results

Categories

Resources