Iterator example from Dive Into Python 3 - python

I'm learning Python as my 1st language from http://www.diveintopython3.net/. On Chp 7, http://www.diveintopython3.net/iterators.html, there is an example of how to use an iterator.
import re
def build_match_and_apply_functions(pattern, search, replace):
def matches_rule(word):
return re.search(pattern, word)
def apply_rule(word):
return re.sub(search, replace, word)
return [matches_rule, apply_rule]
class LazyRules:
rules_filename = 'plural6-rules.txt'
def __init__(self):
self.pattern_file = open(self.rules_filename, encoding='utf-8')
self.cache = []
def __iter__(self):
self.cache_index = 0
return self
def __next__(self):
self.cache_index += 1
if len(self.cache) >= self.cache_index:
return self.cache[self.cache_index - 1]
if self.pattern_file.closed:
raise StopIteration
line = self.pattern_file.readline()
if not line:
self.pattern_file.close()
raise StopIteration
pattern, search, replace = line.split(None, 3)
funcs = build_match_and_apply_functions(
pattern, search, replace)
self.cache.append(funcs)
return funcs
rules = LazyRules()
def plural(noun):
for matches_rule, apply_rule in rules:
if matches_rule(noun):
return apply_rule(noun)
if __name__ == '__main__':
import sys
if sys.argv[1:]:
print(plural(sys.argv[1]))
else:
print(__doc__)
My question is: how does the 'for matches_rule, apply_rule in rules:' loop in the plural(noun) function know when to exit after fulfilling the if condition? There are no StopIteration commands for that condition. I would expect the for loop to continue until the rules.cache is iterated completely.
Thank you for the help!

The return statement ends the function at that point, returning a value to the caller. This can be relied upon in almost any situation (if you have a try..except..else..finally structure, even a return statement won't prevent the finally block from being executed).

Related

Python decorator to time recursive functions

I have a simple decorator to track the runtime of a function call:
def timed(f):
def caller(*args):
start = time.time()
res = f(*args)
end = time.time()
return res, end - start
return caller
This can be used as follows, and returns a tuple of the function result and the execution time.
#timed
def test(n):
for _ in range(n):
pass
return 0
print(test(900)) # prints (0, 2.69e-05)
Simple enough. But now I want to apply this to recursive functions. Applying the above wrapper to a recursive function results in nested tuples with the times of each recursive call, as is expected.
#timed
def rec(n):
if n:
return rec(n - 1)
else:
return 0
print(rec(3)) # Prints ((((0, 1.90e-06), 8.10e-06), 1.28e-05), 1.90e-05)
What's an elegant way to write the decorator so that it handles recursion properly? Obviously, you could wrap the call if a timed function:
#timed
def wrapper():
return rec(3)
This will give a tuple of the result and the time, but I want all of it to be handled by the decorator so that the caller does not need to worry about defining a new function for every call. Ideas?
The problem here isn't really the decorator. The problem is that rec needs rec to be a function that behaves one way, but you want rec to be a function that behaves differently. There's no clean way to reconcile that with a single rec function.
The cleanest option is to stop requiring rec to be two things at once. Instead of using decorator notation, assign timed(rec) to a different name:
def rec(n):
...
timed_rec = timed(rec)
If you don't want two names, then rec needs to be written to understand the actual value that the decorated rec will return. For example,
#timed
def rec(n):
if n:
val, runtime = rec(n-1)
return val
else:
return 0
I prefer the other answers so far (particularly user2357112's answer), but you can also make a class-based decorator that detects whether the function has been activated, and if so, bypasses the timing:
import time
class fancy_timed(object):
def __init__(self, f):
self.f = f
self.active = False
def __call__(self, *args):
if self.active:
return self.f(*args)
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
self.active = False
return res, end - start
#fancy_timed
def rec(n):
if n:
time.sleep(0.01)
return rec(n - 1)
else:
return 0
print(rec(3))
(class written with (object) so that this is compatible with py2k and py3k).
Note that to really work properly, the outermost call should use try and finally. Here's the fancied up fancy version of __call__:
def __call__(self, *args):
if self.active:
return self.f(*args)
try:
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
return res, end - start
finally:
self.active = False
You could structure your timer in a different way by *ahem* abusing the contextmanager and function attribute a little...
from contextlib import contextmanager
import time
#contextmanager
def timed(func):
timed.start = time.time()
try:
yield func
finally:
timed.duration = time.time() - timed.start
def test(n):
for _ in range(n):
pass
return n
def rec(n):
if n:
time.sleep(0.05) # extra delay to notice the difference
return rec(n - 1)
else:
return n
with timed(rec) as r:
print(t(10))
print(t(20))
print(timed.duration)
with timed(test) as t:
print(t(555555))
print(t(666666))
print(timed.duration)
Results:
# recursive
0
0
1.5130000114440918
# non-recursive
555555
666666
0.053999900817871094
If this is deemed a bad hack I'll gladly accept your criticism.
Although it is not an overall solution to the problem of integrating recursion with decorators, for the problem of timing only, I have verified that the last element of the tuple of the times is the overall run time, as this is the time from the upper-most recursive call. Thus if you had
#timed
def rec():
...
to get the overall runtime given the original function definitions you could simply do
rec()[1]
Getting the result of the call, on the other hand, would then require recusing through the nested tuple:
def get(tup):
if isinstance(tup, tuple):
return get(tup[0])
else:
return tup
This might be too complicated to simply get the result of your function.
I encountered the same issue when trying to profile a simple quicksort implementation.
The main issue is that decorators are executed on each function call and we need something that can keep a state, so we can sum all calls at the end. Decorators are not the right tool the job
However, one idea is to abuse the fact that functions are objects and can have atributes. This is explored below with a simple decorator. Something that must be understood is that, by using decorator's sintax sugar (#), the function will always be accumulating its timings.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
# usage
#timeit
def rec(n):
...
if __name__ == "__main__":
result = rec(4) # rec result
print(f"Took {rec.timed:.2f} seconds")
# Out: Took 3.39 seconds
result = rec(4) # rec result
# timings between calls are accumulated
# Out: Took 6.78 seconds
Which brings us to a solution inspired by #r.ook, below is a simple context manager that stores each run timing and prints its sum at the end (__exit__). Notice that, because for each timing we require a with statement, this will not accumulate different runs.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
# TODO: report `exc_*` if an exception get raised
print(f"Took {sum(self.timed):.2f} seconds")
return
# usage
def rec(n):
...
if __name__ == "__main__":
with timeit(rec) as f:
result = f(a) # rec result
# Out: Took 3.39 seconds

Python defining function under condition

Is this code correct in python?
def foo(flag):
if flag:
def bar():
# Somthing
else:
def bar():
# Somthing else
bar()
foo(True)
foo(False)
if not what is a recommended way to set behavior of some function (bar) under? condition?
OK The real code is following
# Building replaceFunc based of ignore_case and use_regexp flags
if not ignore_case:
if not use_regexp:
def replaceFunc(string, search, replace):
return string.replace(search, replace)
else:
def replaceFunc(string, search, replace):
pattern = re.compile(search)
return pattern.sub(replace, string)
else:
if not use_regexp:
# There is no standard puthon function for replacing string by ignoring case
def replaceFunc(string, search, replace):
# implementation from http://stackoverflow.com/questions/919056/case-insensitive-replace
return string
else:
def replaceFunc(string, search, replace):
pattern = re.compile(search, re.IGNORECASE)
return pattern.sub(replace, string
Here's one reasonable way to achieve what you want:
def bar1():
return 'b1'
def bar2():
return 'b2'
def foo(flag):
bar = bar2 if flag else bar1
return bar()
print(foo(False))
print(foo(True))
One benefit of defining the functions bar1() and bar2() outside of foo() is that they can be unit tested.

Trying to implement a Python Decorator, stuck in a few places

I am currently learning about decorators, this example is supposed to be a basic decorator that saves the result of a recursive fibonacci function, but there are several questions I have. first of all "fn", is that just the name of a variable? or is it a part of the python language. Same question for 'KeyError'. I also don't understand why in the body of the function, args sometimes has a * in front and other times doesn't (shouldn't it always have a *)
def memoize(fn):
stored_results = {}
def memoized(*args):
try:
return stored_results[args]
except KeyError:
result = stored_results[args]=fn(*args)
return result
return memoized
def fibonacci(n):
if n == 0 or n == 1:
return n
else:
return (fibonacci(n-1)+fibonacci(n-2))
#memoize
fibonacci(5)
Indent correctly.
def memoize(fn):
stored_results = {}
def memoized(*args):
try:
return stored_results[args]
except KeyError:
result = stored_results[args]=fn(*args)
return result
return memoized
#^^^
#memoize should be before the def ... line.
#memoize # <-- should be here
def fibonacci(n):
if n == 0 or n == 1:
return n
else:
return (fibonacci(n-1)+fibonacci(n-2))
Just call fibonacci without #memoize
fibonacci(5)
first of all "fn", is that just the name of a variable?
A function object. Here, it is the fibonacci function.
Same question for 'KeyError'
If there's no previously memoized value for the argument, stored_results[args] raises KeyError. (TypeError if you pass unhashable value as argument).
I also don't understand why in the body of the function, args sometimes has a * in front
To make memoize decorator to work with another function which could take multiple arguments.

Python Custom Iterator: Close a file on StopIteration

I have written an iterator class that opens a file in it's __init__.
def __init__(self, path):
self.file = open(path, "r")
How do I close that file automatically when the iteration is finished?
Complete class:
class Parse(object):
"""A generator that iterates through a CC-CEDICT formatted file, returning
a tuple of parsed results (Traditional, Simplified, Pinyin, English)"""
def __init__(self, path):
self.file = open(path, "r")
def __iter__(self):
return self
def __is_comment(self, line):
return line.startswith("#")
def next(self):
#This block ignores comments.
line = self.file.readline()
while line and self.__is_comment(line):
line = self.file.readline()
if line:
working = line.rstrip().split(" ")
trad, simp = working[0], working[1]
working = " ".join(working[2:]).split("]")
pinyin = working[0][1:]
english = working[1][1:]
return trad, simp, pinyin, english
else:
raise StopIteration()
A better way to write the whole thing would be to keep the opening and the iteration in one place:
class Parse(object):
"""A generator that iterates through a CC-CEDICT formatted file, returning
a tuple of parsed results (Traditional, Simplified, Pinyin, English)"""
def __init__(self, path):
self.path = path
def __is_comment(self, line):
return line.startswith("#")
def __iter__(self):
with open(self.path) as f:
for line in f:
if self.__is_comment(line):
continue
working = line.rstrip().split(" ")
trad, simp = working[0], working[1]
working = " ".join(working[2:]).split("]")
pinyin = working[0][1:]
english = working[1][1:]
yield trad, simp, pinyin, english
This will wait to open the file until you really need it, and will automatically close it when done. It's also less code.
If you really want to get into the "generators are awesome!" mindset:
def skip_comments(f):
for line in f:
if not.startswith('#'):
yield line
...
def __iter__(self):
with open(self.path) as f:
for line in skip_comments(f):
working = ....
You need to explicitly close it as soon as StopIteration is raised. In this case, simply call .close() when you raise StopIteration yourself.
def next(self):
#This block ignores comments.
line = self.file.readline()
while line and self.__is_comment(line):
line = self.file.readline()
if line:
working = line.rstrip().split(" ")
trad, simp = working[0], working[1]
working = " ".join(working[2:]).split("]")
pinyin = working[0][1:]
english = working[1][1:]
return trad, simp, pinyin, english
else:
self.file.close()
raise StopIteration()
Since no other code in your .next() method could trigger a StopIteration this suffices.
If you did use next() on another iterator inside your own .next() you'd have to catch StopIteration with an except StopIteration: handler and reraise the exception.
This only handles the StopIteration case. If you want to handle other situations (not exhausting the iterator) you'll need to handle that situation separately. Making your class a Context Manager as well could help with that. Users of your iterator would then use the object in a with statement before iterating over it, and when the with suite is exited the file could be closed regardless. You may want to mark your iterator as 'done' as well in that case:
_closed = False
def next(self):
if self._closed:
raise StopIteration
line = self.file.readline()
while line and self.__is_comment(line):
line = self.file.readline()
if line:
working = line.rstrip().split(" ")
trad, simp = working[0], working[1]
working = " ".join(working[2:]).split("]")
pinyin = working[0][1:]
english = working[1][1:]
return trad, simp, pinyin, english
else:
self.file.close()
self._closed = True
raise StopIteration()
def __enter__(self):
return self
def __exit__(self, type_, value, tb):
self.file.close() # multiple calls to .close() are fine
self._closed = True

loop fails when increasing string from elements in list

I have some problem with sequence generator. I have a file where each line contain one fragment (8 letters). I load it from file in to list, where each element is one fragment. It is DNA so it should go that way:
1. Takes first 8-letter element
2. Check for element in which first 7 letters is the same as last 7 letters in first.
3. Add 8th letter from second element in to sequence.
It should look like this:
ATTGCCAT
TTGCCATA
TGCAATAC
So sequence: ATTGCCATAC
Unfortunately it only add one element. :( First element is given (we knew it). I do it that way its first in file (first line).
Here is the code:
from os import sys
import random
def frag_get(seqfile):
frags = []
f_in = open(seqfile, "r")
for i in f_in.readlines():
frags.append(i.strip())
f_in.close()
return frags
def frag_list_shuffle(frags):
random.shuffle(frags)
return frags
def seq_build(first, frags):
seq = first
for f in frags:
if seq[-7:] == f[:7]:
seq += f[-1:]
return seq
def errors():
pass
if __name__ == "__main__":
frags = frag_get(sys.argv[1])
first = frags[0]
frags.remove(first)
frags = frag_list_shuffle(frags)
seq = seq_build(first, frags)
check(sys.argv[2], seq)
spectrum(sys.argv[2], sys.argv[3])
I have deleted check and spectrum functions because it's simple calculations e.g. length comparison, so it is not what cause a problem as I think.
I will be very thankfully for help!
Regards,
Mateusz
Because your fragments are shuffled, your algorithm needs to take that into account; currently, you're just looping through the fragments once, which is unlikely to include more than a few fragments if they're not in the right order. For example, say you have 5 fragments, which I'm going to refer to by their order in your sequence. Now the fragments are slightly out of order:
1 - 3 - 2 - 4 - 5
Your algorithm will start with 1, skip 3, then match on 2, adding a base at the end. Then it'll check against 4 and 5, and then finish, never reaching fragment 3.
You could easily fix this by starting your loop again each time you add a base, however, this will scale very badly for a large number of bases. Instead, I'd recommend loading your fragments into a trie, and then searching the trie for the next fragment each time you add a base, until you've added one base for each fragment or you can no longer find a matching fragment.
works for me:
>>> seq = "ATTGCCAT"
>>> frags = ["TTGCCATA", "TGCCATAC"]
>>> for f in frags:
... if seq[-7:] == f[:7]:
... seq += f[-1:]
...
>>> seq
'ATTGCCATAC'
You have a spelling error in your example, TGCAATAC should be TGCCATAC. But fixing that it works.
For fun and interest, I've rewritten the problem using OO. See what you think:
import collections
import sys
import random
usage = """
Usage:
sequence fname expected
Where
fname: name of file containing fragments
expected: result-string which should be obtained by chaining from first fragment.
"""
class Frag(str):
MATCHLEN = 7
def __new__(cls, s=''):
return str.__new__(cls, s.strip())
def head(self):
return Frag(self[:Frag.MATCHLEN])
def tail(self):
return Frag(self[Frag.MATCHLEN:])
def nexthead(self):
return Frag(self[-Frag.MATCHLEN:])
def check(self, s):
return self.__eq__(s)
def __add__(self, s):
return Frag(str(self).__add__(s))
class Fraglist(list):
#classmethod
def fromFile(cls, fname):
with open(fname, "r") as inf:
lst = [Frag(ln) for ln in inf]
return cls(lst)
def shuffle(self):
random.shuffle(self)
class Sequencer(object):
def __init__(self, seq=None):
super(Sequencer, self).__init__()
self.sequences = collections.defaultdict(list)
if seq is not None:
for frag in seq:
self.sequences[frag.head()].append(frag.tail())
def build(self, frag):
res = [frag]
match = frag.nexthead()
while match in self.sequences:
next = random.choice(self.sequences[match])
res.append(next)
match = (match + next).nexthead()
return Frag(''.join(res))
def main():
if len(sys.argv) != 3:
print usage
sys.exit(-1)
else:
fname = sys.argv[1]
expected = sys.argv[2]
frags = Fraglist.fromFile(fname)
frag1 = frags.pop(0)
frags.shuffle()
seq = Sequencer(frags)
result = seq.build(frag1)
if result.check(expected):
print "Match!"
else:
print "No match"
if __name__=="__main__":
main()

Categories

Resources