How to write foldr (right fold) generator in Python? - python

Python's reduce is a left-fold, which means it is tail-recursive and its uses can be neatly rewritten as a loop. However, Python does not have a built-in function for doing right folds. Since right-folds are most naturally written with recursion (and Python doesn't like recursion as much as functional languages), I'm interested in writing a right fold (foldr) in terms of a generator.
How can this be done? And very specifically, how can it be done in Python 2.7?
EDIT: I should have mentioned that one of the benefits to foldr is that you can sometimes fold on infinite lists without risk of eating your stack alive. I would like to see answers that preserve this property.
For example, Haskell's foldr is lazy on both input and output and can allow for short-circuiting "step" functions to work on long/infinite inputs:
foldr (&&) True (repeat False) -- gives False
Any Python variant that uses list/reversed/etc. on the input will hang if given itertools.repeat(some_value).
Note that Python's reduce chokes in the same example because of strictness:
reduce(lambda x, y: x and y, itertools.repeat(False), True) # hangs

So a simple generator in python (without appropriate error checking):
def foldr(op, lst):
l, x = reversed(list(lst)), None
for i in l:
if not x:
x = i
continue
x = op(x, i)
yield x
e.g.:
>>> from operator import mul
>>> for i in foldr(mul, [1,2,3,4]):
... print i
24
24
12
Almost identical to the 'roughly equivalent' implementation of reduce in the documentation:
def foldr(function, iterable, initializer=None):
it = reversed(list(iterable))
if initializer is None:
try:
initializer = next(it)
except StopIteration:
raise TypeError('foldr() of empty sequence with no initial value')
accum_value = initializer
for x in it:
accum_value = function(accum_value, x)
yield accum_value
[Edit]
So purely as an exercise of the mind and with very little practical value, it is possible to defer as long as there is some cooperation between the function that you a folding over... e.g.:
class Defer(object):
def __init__(self, func, *args):
self.func = func
self.args = args
def __bool__(self):
return self.func(*self.args)
def __int__(self):
return self.func(*self.args)
def foldr(function, iterable, initializer):
it = iter(iterable)
try:
return function(next(it), Defer(foldr, function, it, initializer))
except StopIteration:
return initializer
Then as long as the function converts to the right type you can defer the calculation, however this will not work with native operators, so not sure how useful this really is:
>>> print(foldr(lambda a, b: int(a)*int(b), [1,2,3,4], 1))
24
Defining a forever generator:
from itertools import repeat
def forever():
yield False
yield True
for i in repeat(False):
yield i
Folding or across an infinite list, returns when it finds a True
>>> print(foldr(lambda a, b: bool(a) or bool(b), forever(), False))
True

You will have to catch appropriate exceptions but should be an idea of how to do it iteratively:
def foldr(a, b, l):
if isinstance(l, Iterator):
it = reversed(list(l))
else:
it = reversed(l)
try:
nxt = next(it)
except StopIteration:
return
c = a(nxt, b)
stop = object()
while nxt is not stop:
yield c
nxt = next(it, stop)
c = a(nxt, c) if nxt is not stop else c
from operator import truediv
for c in (foldr(truediv, 1, [1, 2, 3, 4, 5, 6, 7, 8])):
print(c)

If you are going to define a function using generators, why not use the following?
def foldr(op, lst):
return reduce(op, reversed(lst))

I think something like this is what you want:
def foldr(fn, seq, init):
it = iter(seq)
try:
x = next(it)
except StopIteration:
try:
for elem in init:
yield elem
except TypeError:
yield init
else:
try:
for elem in fn(x, foldr(fn, it, init)):
yield elem
except TypeError:
yield fn(x, foldr(fn, it, init))
It's not exactly production-ready since it will hit the Python stack limit pretty quickly and it will be surprising in the presence of side-effecting functions due to the double call to fn, but it should be enough to give you an idea.

Related

Mixing yield and return. `yield [cand]; return` vs `return [[cand]]`. Why do they lead to different output? [duplicate]

This question already has answers here:
Return in generator together with yield
(2 answers)
Closed last year.
Why does
yield [cand]
return
lead to different output/behavior than
return [[cand]]
Minimal viable example
uses recursion
the output of the version using yield [1]; return is different than the output of the version using return [[1]]
def foo(i):
if i != 1:
yield [1]
return
yield from foo(i-1)
def bar(i):
if i != 1:
return [[1]]
yield from bar(i-1)
print(list(foo(1))) # [[1]]
print(list(bar(1))) # []
Min viable counter example
does not use recurion
the output of the version using yield [1]; return is the same as the output of the version using return [[1]]
def foo():
yield [1]
return
def foofoo():
yield from foo()
def bar():
return [[1]]
def barbar():
yield from bar()
print(list(foofoo())) # [[1]]
print(list(barbar())) # [[1]]
Full context
I'm solving Leetcode #39: Combination Sum and was wondering why one solution works, but not the other:
Working solution
from functools import cache # requires Python 3.9+
class Solution:
def combinationSum(self, candidates: List[int], target: int) -> List[List[int]]:
#cache
def helper(targ, i=0):
if i == N or targ < (cand := candidates[i]):
return
if targ == cand:
yield [cand]
return
for comb in helper(targ - cand, i):
yield comb + [cand]
yield from helper(targ, i+1)
N = len(candidates)
candidates.sort()
yield from helper(target)
Non-working solution
from functools import cache # requires Python 3.9+
class Solution:
def combinationSum(self, candidates: List[int], target: int) -> List[List[int]]:
#cache
def helper(targ, i=0):
if i == N or targ < (cand := candidates[i]):
return
if targ == cand:
return [[cand]]
for comb in helper(targ - cand, i):
yield comb + [cand]
yield from helper(targ, i+1)
N = len(candidates)
candidates.sort()
yield from helper(target)
Output
On the following input
candidates = [2,3,6,7]
target = 7
print(Solution().combinationSum(candidates, target))
the working solution correctly prints
[[3,2,2],[7]]
while the non-working solution prints
[]
I'm wondering why yield [cand]; return works, but return [[cand]] doesn't.
In a generator function, return just defines the value associated with the StopIteration exception implicitly raised to indicate an iterator is exhausted. It's not produced during iteration, and most iterating constructs (e.g. for loops) intentionally ignore the StopIteration exception (it means the loop is over, you don't care if someone attached random garbage to a message that just means "we're done").
For example, try:
>>> def foo():
... yield 'onlyvalue' # Existence of yield keyword makes this a generator
... return 'returnvalue'
...
>>> f = foo() # Makes a generator object, stores it in f
>>> next(f) # Pull one value from generator
'onlyvalue'
>>> next(f) # There is no other yielded value, so this hits the return; iteration over
--------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
...
StopIteration: 'returnvalue'
As you can see, your return value does get "returned" in a sense (it's not completely discarded), but it's never seen by anything iterating normally, so it's largely useless. Outside of rare cases involving using generators as coroutines (where you're using .send() and .throw() on instances of the generator and manually advancing it with next(genobj)), the return value of a generator won't be seen.
In short, you have to pick one:
Use yield anywhere in a function, and it's a generator (whether or not the code path of a particular call ever reaches a yield) and return just ends generation (while maybe hiding some data in the StopIteration exception). No matter what you do, calling the generator function "returns" a new generator object (which you can loop over until exhausted), it can never return a raw value computed inside the generator function (which doesn't even begin running until you loop over it at least once).
Don't use yield, and return works as expected (because it's not a generator function).
As an example to explain what happens to the return value in normal looping constructs, this is what for x in gen(): effectively expands to a C optimized version of:
__unnamed_iterator = iter(gen())
while True:
try:
x = next(__unnamed_iterator)
except StopIteration: # StopIteration caught here without inspecting it
break # Loop ends, StopIteration exception cleaned even from sys.exc_info() to avoid possible reference cycles
# body of loop goes here
# Outside of loop, there is no StopIteration object left
As you can see, the expanded form of the for loop has to look for a StopIteration to indicate the loop is over, but it doesn't use it. And for anything that's not a generator, the StopIteration never has any associated values; the for loop has no way to report them even if it did (it has to end the loop when it's told iteration is over, and the arguments to StopIteration are explicitly not part of the values iterated anyway). Anything else that consumes the generator (e.g. calling list on it) is doing roughly the same thing as the for loop, ignoring the StopIteration in the same way; nothing except code that specifically expects generators (as opposed to more generalized iterables and iterators) will ever bother to inspect the StopIteration object (at the C layer, there are optimizations that StopIteration objects aren't even produced by most iterators; they return NULL and leave the set exception empty, which all iterator protocol using things know is equivalent to returning NULL and setting a StopIteration object, so for anything but a generator, there isn't even an exception to inspect much of the time).

Pythonic way to get next iterable value without incrementing iterator [duplicate]

I can't figure out how to look ahead one element in a Python generator. As soon as I look it's gone.
Here is what I mean:
gen = iter([1,2,3])
next_value = gen.next() # okay, I looked forward and see that next_value = 1
# but now:
list(gen) # is [2, 3] -- the first value is gone!
Here is a more real example:
gen = element_generator()
if gen.next_value() == 'STOP':
quit_application()
else:
process(gen.next())
Can anyone help me write a generator that you can look one element forward?
See also: Resetting generator object in Python
For sake of completeness, the more-itertools package (which should probably be part of any Python programmer's toolbox) includes a peekable wrapper that implements this behavior. As the code example in the documentation shows:
>>> p = peekable(['a', 'b'])
>>> p.peek()
'a'
>>> next(p)
'a'
However, it's often possible to rewrite code that would use this functionality so that it doesn't actually need it. For example, your realistic code sample from the question could be written like this:
gen = element_generator()
command = gen.next_value()
if command == 'STOP':
quit_application()
else:
process(command)
(reader's note: I've preserved the syntax in the example from the question as of when I'm writing this, even though it refers to an outdated version of Python)
The Python generator API is one way: You can't push back elements you've read. But you can create a new iterator using the itertools module and prepend the element:
import itertools
gen = iter([1,2,3])
peek = gen.next()
print list(itertools.chain([peek], gen))
Ok - two years too late - but I came across this question, and did not find any of the answers to my satisfaction. Came up with this meta generator:
class Peekorator(object):
def __init__(self, generator):
self.empty = False
self.peek = None
self.generator = generator
try:
self.peek = self.generator.next()
except StopIteration:
self.empty = True
def __iter__(self):
return self
def next(self):
"""
Return the self.peek element, or raise StopIteration
if empty
"""
if self.empty:
raise StopIteration()
to_return = self.peek
try:
self.peek = self.generator.next()
except StopIteration:
self.peek = None
self.empty = True
return to_return
def simple_iterator():
for x in range(10):
yield x*3
pkr = Peekorator(simple_iterator())
for i in pkr:
print i, pkr.peek, pkr.empty
results in:
0 3 False
3 6 False
6 9 False
9 12 False
...
24 27 False
27 None False
i.e. you have at any moment during iteration access to the next item in the list.
Using itertools.tee will produce a lightweight copy of the generator; then peeking ahead at one copy will not affect the second copy. Thus:
import itertools
def process(seq):
peeker, items = itertools.tee(seq)
# initial peek ahead
# so that peeker is one ahead of items
if next(peeker) == 'STOP':
return
for item in items:
# peek ahead
if next(peeker) == "STOP":
return
# process items
print(item)
The items generator is unaffected by modifications to peeker. However, modifying seq after the call to tee may cause problems.
That said: any algorithm that requires looking an item ahead in a generator could instead be written to use the current generator item and the previous item. This will result in simpler code - see my other answer to this question.
An iterator that allows peeking at the next element and also further ahead. It reads ahead as needed and remembers the values in a deque.
from collections import deque
class PeekIterator:
def __init__(self, iterable):
self.iterator = iter(iterable)
self.peeked = deque()
def __iter__(self):
return self
def __next__(self):
if self.peeked:
return self.peeked.popleft()
return next(self.iterator)
def peek(self, ahead=0):
while len(self.peeked) <= ahead:
self.peeked.append(next(self.iterator))
return self.peeked[ahead]
Demo:
>>> it = PeekIterator(range(10))
>>> it.peek()
0
>>> it.peek(5)
5
>>> it.peek(13)
Traceback (most recent call last):
File "<pyshell#68>", line 1, in <module>
it.peek(13)
File "[...]", line 15, in peek
self.peeked.append(next(self.iterator))
StopIteration
>>> it.peek(2)
2
>>> next(it)
0
>>> it.peek(2)
3
>>> list(it)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
>>> gen = iter(range(10))
>>> peek = next(gen)
>>> peek
0
>>> gen = (value for g in ([peek], gen) for value in g)
>>> list(gen)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Just for fun, I created an implementation of a lookahead class based on the suggestion by
Aaron:
import itertools
class lookahead_chain(object):
def __init__(self, it):
self._it = iter(it)
def __iter__(self):
return self
def next(self):
return next(self._it)
def peek(self, default=None, _chain=itertools.chain):
it = self._it
try:
v = self._it.next()
self._it = _chain((v,), it)
return v
except StopIteration:
return default
lookahead = lookahead_chain
With this, the following will work:
>>> t = lookahead(xrange(8))
>>> list(itertools.islice(t, 3))
[0, 1, 2]
>>> t.peek()
3
>>> list(itertools.islice(t, 3))
[3, 4, 5]
With this implementation it is a bad idea to call peek many times in a row...
While looking at the CPython source code I just found a better way which is both shorter and more efficient:
class lookahead_tee(object):
def __init__(self, it):
self._it, = itertools.tee(it, 1)
def __iter__(self):
return self._it
def peek(self, default=None):
try:
return self._it.__copy__().next()
except StopIteration:
return default
lookahead = lookahead_tee
Usage is the same as above but you won't pay a price here to use peek many times in a row. With a few more lines you can also look ahead more than one item in the iterator (up to available RAM).
A simple solution is to use a function like this:
def peek(it):
first = next(it)
return first, itertools.chain([first], it)
Then you can do:
>>> it = iter(range(10))
>>> x, it = peek(it)
>>> x
0
>>> next(it)
0
>>> next(it)
1
If anybody is interested, and please correct me if I am wrong, but I believe it is pretty easy to add some push back functionality to any iterator.
class Back_pushable_iterator:
"""Class whose constructor takes an iterator as its only parameter, and
returns an iterator that behaves in the same way, with added push back
functionality.
The idea is to be able to push back elements that need to be retrieved once
more with the iterator semantics. This is particularly useful to implement
LL(k) parsers that need k tokens of lookahead. Lookahead or push back is
really a matter of perspective. The pushing back strategy allows a clean
parser implementation based on recursive parser functions.
The invoker of this class takes care of storing the elements that should be
pushed back. A consequence of this is that any elements can be "pushed
back", even elements that have never been retrieved from the iterator.
The elements that are pushed back are then retrieved through the iterator
interface in a LIFO-manner (as should logically be expected).
This class works for any iterator but is especially meaningful for a
generator iterator, which offers no obvious push back ability.
In the LL(k) case mentioned above, the tokenizer can be implemented by a
standard generator function (clean and simple), that is completed by this
class for the needs of the actual parser.
"""
def __init__(self, iterator):
self.iterator = iterator
self.pushed_back = []
def __iter__(self):
return self
def __next__(self):
if self.pushed_back:
return self.pushed_back.pop()
else:
return next(self.iterator)
def push_back(self, element):
self.pushed_back.append(element)
it = Back_pushable_iterator(x for x in range(10))
x = next(it) # 0
print(x)
it.push_back(x)
x = next(it) # 0
print(x)
x = next(it) # 1
print(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
it.push_back(y)
it.push_back(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
for x in it:
print(x) # 4-9
This will work -- it buffers an item and calls a function with each item and the next item in the sequence.
Your requirements are murky on what happens at the end of the sequence. What does "look ahead" mean when you're at the last one?
def process_with_lookahead( iterable, aFunction ):
prev= iterable.next()
for item in iterable:
aFunction( prev, item )
prev= item
aFunction( item, None )
def someLookaheadFunction( item, next_item ):
print item, next_item
Instead of using items (i, i+1), where 'i' is the current item and i+1 is the 'peek ahead' version, you should be using (i-1, i), where 'i-1' is the previous version from the generator.
Tweaking your algorithm this way will produce something that is identical to what you currently have, apart from the extra needless complexity of trying to 'peek ahead'.
Peeking ahead is a mistake, and you should not be doing it.
Although itertools.chain() is the natural tool for the job here, beware of loops like this:
for elem in gen:
...
peek = next(gen)
gen = itertools.chain([peek], gen)
...Because this will consume a linearly growing amount of memory, and eventually grind to a halt. (This code essentially seems to create a linked list, one node per chain() call.) I know this not because I inspected the libs but because this just resulted in a major slowdown of my program - getting rid of the gen = itertools.chain([peek], gen) line sped it up again. (Python 3.3)
Python3 snippet for #jonathan-hartley answer:
def peek(iterator, eoi=None):
iterator = iter(iterator)
try:
prev = next(iterator)
except StopIteration:
return iterator
for elm in iterator:
yield prev, elm
prev = elm
yield prev, eoi
for curr, nxt in peek(range(10)):
print((curr, nxt))
# (0, 1)
# (1, 2)
# (2, 3)
# (3, 4)
# (4, 5)
# (5, 6)
# (6, 7)
# (7, 8)
# (8, 9)
# (9, None)
It'd be straightforward to create a class that does this on __iter__ and yields just the prev item and put the elm in some attribute.
w.r.t #David Z's post, the newer seekable tool can reset a wrapped iterator to a prior position.
>>> s = mit.seekable(range(3))
>>> s.next()
# 0
>>> s.seek(0) # reset iterator
>>> s.next()
# 0
>>> s.next()
# 1
>>> s.seek(1)
>>> s.next()
# 1
>>> next(s)
# 2
cytoolz has a peek function.
>> from cytoolz import peek
>> gen = iter([1,2,3])
>> first, continuation = peek(gen)
>> first
1
>> list(continuation)
[1, 2, 3]
In my case, I need a generator where I could queue back to generator the data I have just got via next() call.
The way I handle this problem, is to create a queue. In the implementation of the generator, I would first check the queue: if queue is not empty, the "yield" will return the values in queue, or otherwise the values in normal way.
import queue
def gen1(n, q):
i = 0
while True:
if not q.empty():
yield q.get()
else:
yield i
i = i + 1
if i >= n:
if not q.empty():
yield q.get()
break
q = queue.Queue()
f = gen1(2, q)
i = next(f)
print(i)
i = next(f)
print(i)
q.put(i) # put back the value I have just got for following 'next' call
i = next(f)
print(i)
running
python3 gen_test.py
0
1
1
This concept is very useful when I was writing a parser, which needs to look the file line by line, if the line appears to belong to next phase of parsing, I could just queue back to the generator so that the next phase of code could parse it correctly without handling complex state.
For those of you who embrace frugality and one-liners, I present to you a one-liner that allows one to look ahead in an iterable (this only works in Python 3.8 and above):
>>> import itertools as it
>>> peek = lambda iterable, n=1: it.islice(zip(it.chain((t := it.tee(iterable))[0], [None] * n), it.chain([None] * n, t[1])), n, None)
>>> for lookahead, element in peek(range(10)):
... print(lookahead, element)
1 0
2 1
3 2
4 3
5 4
6 5
7 6
8 7
9 8
None 9
>>> for lookahead, element in peek(range(10), 2):
... print(lookahead, element)
2 0
3 1
4 2
5 3
6 4
7 5
8 6
9 7
None 8
None 9
This method is space-efficient by avoiding copying the iterator multiple times. It is also fast due to how it lazily generates elements. Finally, as a cherry on top, you can look ahead an arbitrary number of elements.
An algorithm that works by "peeking" at the next element in a generator could equivalently be one that works by remembering the previous element, treating that element as the one to operate upon, and treating the "current" element as simply "peeked at".
Either way, what is really happening is that the algorithm considers overlapping pairs from the generator. The itertools.tee recipe will work fine - and it is not hard to see that it is essentially a refactored version of Jonathan Hartley's approach:
from itertools import tee
# From https://docs.python.org/3/library/itertools.html#itertools.pairwise
# In 3.10 and up, this is directly supplied by the `itertools` module.
def pairwise(iterable):
# pairwise('ABCDEFG') --> AB BC CD DE EF FG
a, b = tee(iterable)
next(b, None)
return zip(a, b)
def process(seq):
for to_process, lookahead in pairwise(seq):
# peek ahead
if lookahead == "STOP":
return
# process items
print(to_process)

Recursively calling an object method that returns an iterator of itself

I'm currently writing a project that requires third party code that uses a method that returns an iterator of itself, an example of how this would look in my code:
def generate():
for x in obj.children():
for y in x.children():
for z in y.children():
yield z.thing
Currently this simply clutters my code, and becomes hard to read after 3 levels. Ideally I'd get it to do something like this:
x = recursive(obj, method="children", repeat=3).thing
Is there a built in way to do this in Python?
Starting from python3.3, you can use the yield from syntax to yield an entire generator expression.
So, you can modify your function a bit, to take a couple of parameters:
def generate(obj, n):
if n == 1:
for x in obj.children():
yield x.thing
else:
for x in obj.children():
yield from generate(x, n - 1)
The yield from expression will yield the entire generator expression of the recursive call.
Call your function like this:
x = generate(obj, 3)
Note that this returns you a generator of x.things.
Based on your particular requirement, here's a more generic version using getattr that works with arbitrary attributes.
def generate(obj, iterable_attr, attr_to_yield, n):
if n == 1:
for x in getattr(obj, iterable_attr):
yield getattr(x, attr_to_yield)
else:
for x in getattr(obj, iterable_attr):
yield from generate(x, iterable_attr, attr_to_yield, n - 1)
And now, call your function as:
x = generate(obj, 'children', 'thing', 3)
If using Python 2.7 you need to keep your own stack of iterables and do the looping:
from operator import methodcaller
def recursive(obj, iterater, yielder, depth):
iterate = methodcaller(iterater)
xs = [iterate(obj)]
while xs:
try:
x = xs[-1].next()
if len(xs) != depth:
xs.append(iterate(x))
else:
yield getattr(x, yielder)
except StopIteration:
xs.pop()
This a specialized case of a more general recursive ichain from iterable function:
def recursive_ichain(iterable_tree):
xs = [iter(iterable_tree)]
while [xs]:
try:
x = xs[-1].next()
if isinstance(x, collections.Iterable):
xs.append(iter(x))
else:
yield x
except StopIteration:
xs.pop()
And some test objects:
class Thing(object):
def __init__(self, thing):
self.thing = thing
class Parent(object):
def __init__(self, *kids):
self.kids = kids
def children(self):
return iter(self.kids)
test_obj = Parent(
Parent(
Parent(Thing('one'), Thing('two'), Thing('three')),
Parent(Thing('four')),
Parent(Thing('five'), Thing('six')),
),
Parent(
Parent(Thing('seven'), Thing('eight')),
Parent(),
Parent(Thing('nine'), Thing('ten')),
)
)
And testing it:
>>>for t in recursive(test_obj, 'children', 'thing', 3):
>>> print t
one
two
three
four
five
six
seven
eight
nine
ten
Personnaly I'd be inclined to change the yield getattr(x, yielder) to yield x to access the leaf objects themselves and explicitly access the thing. i.e.
for leaf in recursive(test_obj, 'children', 3):
print leaf.thing
The yield from example above is good, but I seriously doubt the level/depth param is needed. A simpler / more generic solution that works for any tree:
class Node(object):
def __init__(self, thing, children=None):
self.thing = thing
self._children = children
def children(self):
return self._children if self._children else []
def generate(node):
if node.thing:
yield node.thing
for child in node.children():
yield from generate(child)
node = Node('mr.', [Node('derek', [Node('curtis')]), Node('anderson')])
print(list(generate(node)))
Returns:
$ python3 test.py
['mr.', 'derek', 'curtis', 'anderson']
Note this will return the current node's thing before any of its children's. (IE it expresses itself on the way down the walk.) If you'd prefer it to express itself on the way back up the walk, swap the if and the for statements. (DFS vs BFS) But likely doesn't matter in your case (where I suspect a node has either a thing or children, never both).

Short-circuit evaluation like Python's "and" while storing results of checks

I have multiple expensive functions that return results. I want to return a tuple of the results of all the checks if all the checks succeed. However, if one check fails I don't want to call the later checks, like the short-circuiting behavior of and. I could nest if statements, but that will get out of hand if there are a lot of checks. How can I get the short-circuit behavior of and while also storing the results for later use?
def check_a():
# do something and return the result,
# for simplicity, just make it "A"
return "A"
def check_b():
# do something and return the result,
# for simplicity, just make it "B"
return "B"
...
This doesn't short-circuit:
a = check_a()
b = check_b()
c = check_c()
if a and b and c:
return a, b, c
This is messy if there are many checks:
if a:
b = check_b()
if b:
c = check_c()
if c:
return a, b, c
Is there a shorter way to do this?
Just use a plain old for loop:
results = {}
for function in [check_a, check_b, ...]:
results[function.__name__] = result = function()
if not result:
break
The results will be a mapping of the function name to their return values, and you can do what you want with the values after the loop breaks.
Use an else clause on the for loop if you want special handling for the case where all of the functions have returned truthy results.
Write a function that takes an iterable of functions to run. Call each one and append the result to a list, or return None if the result is False. Either the function will stop calling further checks after one fails, or it will return the results of all the checks.
def all_or_none(checks, *args, **kwargs):
out = []
for check in checks:
rv = check(*args, **kwargs)
if not rv:
return None
out.append(rv)
return out
rv = all_or_none((check_a, check_b, check_c))
# rv is a list if all checks passed, otherwise None
if rv is not None:
return rv
def check_a(obj):
...
def check_b(obj):
...
# pass arguments to each check, useful for writing reusable checks
rv = all_or_none((check_a, check_b), obj=my_object)
In other languages that did have assignments as expressions you would be able to use
if (a = check_a()) and (b = check_b()) and (c = check_c()):
but Python is no such language. Still, we can circumvent the restriction and emulate that behaviour:
result = []
def put(value):
result.append(value)
return value
if put(check_a()) and put(check_b()) and put(check_c()):
# if you need them as variables, you could do
# (a, b, c) = result
# but you just want
return tuple(result)
This might loosen the connection between the variables and function calls a bit too much, so if you want to do lots of separate things with the variables, instead of using the result elements in the order they were put in the list, I would rather avoid this approach. Still, it might be quicker and shorter than some loop.
You could use either a list or an OrderedDict, using a for loop would serve the purpose of emulating short circuiting.
from collections import OrderedDict
def check_a():
return "A"
def check_b():
return "B"
def check_c():
return "C"
def check_d():
return False
def method1(*args):
results = []
for i, f in enumerate(args):
value = f()
results.append(value)
if not value:
return None
return results
def method2(*args):
results = OrderedDict()
for f in args:
results[f.__name__] = result = f()
if not result:
return None
return results
# Case 1, it should return check_a, check_b, check_c
for m in [method1, method2]:
print(m(check_a, check_b, check_c))
# Case 1, it should return None
for m in [method1, method2]:
print(m(check_a, check_b, check_d, check_c))
There are lots of ways to do this! Here's another.
You can use a generator expression to defer the execution of the functions. Then you can use itertools.takewhile to implement the short-circuiting logic by consuming items from the generator until one of them is false.
from itertools import takewhile
functions = (check_a, check_b, check_c)
generator = (f() for f in functions)
results = tuple(takewhile(bool, generator))
if len(results) == len(functions):
return results
Another way to tackle this is using a generator, since generators use lazy evaluation. First put all checks into a generator:
def checks():
yield check_a()
yield check_b()
yield check_c()
Now you could force evaluation of everything by converting it to a list:
list(checks())
But the standard all function does proper short cut evaluation on the iterator returned from checks(), and returns whether all elements are truthy:
all(checks())
Last, if you want the results of succeeding checks up to the failure you can use itertools.takewhile to take the first run of truthy values only. Since the result of takewhile is lazy itself you'll need to convert it to a list to see the result in a REPL:
from itertools import takewhile
takewhile(lambda x: x, checks())
list(takewhile(lambda x: x, checks()))
main logic:
results = list(takewhile(lambda x: x, map(lambda x: x(), function_list)))
if len(results) == len(function_list):
return results
you can learn a lot about collection transformations if you look at all methods of an api like http://www.scala-lang.org/api/2.11.7/#scala.collection.immutable.List and search/implement python equivalents
logic with setup and alternatives:
import sys
if sys.version_info.major == 2:
from collections import imap
map = imap
def test(bool):
def inner():
print(bool)
return bool
return inner
def function_for_return():
function_list = [test(True),test(True),test(False),test(True)]
from itertools import takewhile
print("results:")
results = list(takewhile(lambda x:x,map(lambda x:x(),function_list)))
if len(results) == len(function_list):
return results
print(results)
#personally i prefer another syntax:
class Iterator(object):
def __init__(self,iterable):
self.iterator = iter(iterable)
def __next__(self):
return next(self.iterator)
def __iter__(self):
return self
def map(self,f):
return Iterator(map(f,self.iterator))
def takewhile(self,f):
return Iterator(takewhile(f,self.iterator))
print("results2:")
results2 = list(
Iterator(function_list)
.map(lambda x:x())
.takewhile(lambda x:x)
)
print(results2)
print("with additional information")
function_list2 = [(test(True),"a"),(test(True),"b"),(test(False),"c"),(test(True),"d")]
results3 = list(
Iterator(function_list2)
.map(lambda x:(x[0](),x[1]))
.takewhile(lambda x:x[0])
)
print(results3)
function_for_return()
If you don't need to take an arbitrary number of expressions at runtime (possibly wrapped in lambdas), you can expand your code directly into this pattern:
def f ():
try:
return (<a> or jump(),
<b> or jump(),
<c> or jump())
except NonLocalExit:
return None
Where those definitions apply:
class NonLocalExit(Exception):
pass
def jump():
raise NonLocalExit()
Flexible short circuiting is really best done with Exceptions. For a very simple prototype you could even just assert each check result:
try:
a = check_a()
assert a
b = check_b()
assert b
c = check_c()
assert c
return a, b, c
except AssertionException as e:
return None
You should probably raise a custom Exception instead. You could change your check_X functions to raise Exceptions themself, in an arbitrary nested way. Or you could wrap or decorate your check_X functions to raise errors on falsy return values.
In short, exception handling is very flexible and exactly what you are looking for, don't be afraid to use it. If you learned somewhere that exception handling is not to be used for your own flow control, this does not apply to python. Liberal use of exception handling is considered pythonic, as in EAFP.
You mentioned 'short-circuiting' in your answer, which can be done with the 'or' statement. Top answer basically does the same thing, but in case someone wants to know more about this behaviour you could do this;
class Container(object):
def __init__(self):
self.values = []
def check_and_cache(self, value, checking_function):
value_true = checking_function(value)
if value_true:
self.values.append(value)
return True
c = Container()
if not c.check_and_cache(a, check_a) or not c.check_and_cache(b, check_b) or not c.check_and_cache(c, check_c):
print 'done'
return tuple(c.values)
The 'not .. or' setup of the if statements will result in a 'True' if the check fails, so the overall if statement passes without evaluating the remaining values.
Since I can not comment "wim":s answer as guest, I'll just add an extra answer.
Since you want a tuple, you should collect the results in a list and then cast to tuple.
def short_eval(*checks):
result = []
for check in checks:
checked = check()
if not checked:
break
result.append(checked)
return tuple(result)
# Example
wished = short_eval(check_a, check_b, check_c)
You can try use #lazy_function decorator from lazy_python
package. Example of usage:
from lazy import lazy_function, strict
#lazy_function
def check(a, b):
strict(print('Call: {} {}'.format(a, b)))
if a + b > a * b:
return '{}, {}'.format(a, b)
a = check(-1, -2)
b = check(1, 2)
c = check(-1, 2)
print('First condition')
if c and a and b: print('Ok: {}'.format((a, b)))
print('Second condition')
if c and b: print('Ok: {}'.format((c, b)))
# Output:
# First condition
# Call: -1 2
# Call: -1 -2
# Second condition
# Call: 1 2
# Ok: ('-1, 2', '1, 2')
This is similar to Bergi's answer but I think that answer misses the point of wanting separate functions (check_a, check_b, check_c):
list1 = []
def check_a():
condition = True
a = 1
if (condition):
list1.append(a)
print ("checking a")
return True
else:
return False
def check_b():
condition = False
b = 2
if (condition):
list1.append(b)
print ("checking b")
return True
else:
return False
def check_c():
condition = True
c = 3
if (condition):
list1.append(c)
print ("checking c")
return True
else:
return False
if check_a() and check_b() and check_c():
# won't get here
tuple1 = tuple(list1)
print (tuple1)
# output is:
# checking a
# (1,)
Or, if you don't want to use the global list, pass a reference of a local list to each of the functions.
If the main objection is
This is messy if there are many checks:
if a:
b = check_b()
if b:
c = check_c()
if c:
return a, b, c
A fairly nice pattern is to reverse the condition and return early
if not a:
return # None, or some value, or however you want to handle this
b = check_b()
if not b:
return
c = check_c()
if not c:
return
# ok, they were all truthy
return a, b, c

Resetting generator object in Python

I have a generator object returned by multiple yield. Preparation to call this generator is rather time-consuming operation. That is why I want to reuse the generator several times.
y = FunctionWithYield()
for x in y: print(x)
#here must be something to reset 'y'
for x in y: print(x)
Of course, I'm taking in mind copying content into simple list. Is there a way to reset my generator?
See also: How to look ahead one element (peek) in a Python generator?
Generators can't be rewound. You have the following options:
Run the generator function again, restarting the generation:
y = FunctionWithYield()
for x in y: print(x)
y = FunctionWithYield()
for x in y: print(x)
Store the generator results in a data structure on memory or disk which you can iterate over again:
y = list(FunctionWithYield())
for x in y: print(x)
# can iterate again:
for x in y: print(x)
The downside of option 1 is that it computes the values again. If that's CPU-intensive you end up calculating twice. On the other hand, the downside of 2 is the storage. The entire list of values will be stored on memory. If there are too many values, that can be unpractical.
So you have the classic memory vs. processing tradeoff. I can't imagine a way of rewinding the generator without either storing the values or calculating them again.
You could also use tee as suggested by other answers, however that would still store the entire list in memory in your case, so it would be the same results and similar performance to option 2.
Another option is to use the itertools.tee() function to create a second version of your generator:
import itertools
y = FunctionWithYield()
y, y_backup = itertools.tee(y)
for x in y:
print(x)
for x in y_backup:
print(x)
This could be beneficial from memory usage point of view if the original iteration might not process all the items.
>>> def gen():
... def init():
... return 0
... i = init()
... while True:
... val = (yield i)
... if val=='restart':
... i = init()
... else:
... i += 1
>>> g = gen()
>>> g.next()
0
>>> g.next()
1
>>> g.next()
2
>>> g.next()
3
>>> g.send('restart')
0
>>> g.next()
1
>>> g.next()
2
Probably the most simple solution is to wrap the expensive part in an object and pass that to the generator:
data = ExpensiveSetup()
for x in FunctionWithYield(data): pass
for x in FunctionWithYield(data): pass
This way, you can cache the expensive calculations.
If you can keep all results in RAM at the same time, then use list() to materialize the results of the generator in a plain list and work with that.
I want to offer a different solution to an old problem
class IterableAdapter:
def __init__(self, iterator_factory):
self.iterator_factory = iterator_factory
def __iter__(self):
return self.iterator_factory()
squares = IterableAdapter(lambda: (x * x for x in range(5)))
for x in squares: print(x)
for x in squares: print(x)
The benefit of this when compared to something like list(iterator) is that this is O(1) space complexity and list(iterator) is O(n). The disadvantage is that, if you only have access to the iterator, but not the function that produced the iterator, then you cannot use this method. For example, it might seem reasonable to do the following, but it will not work.
g = (x * x for x in range(5))
squares = IterableAdapter(lambda: g)
for x in squares: print(x)
for x in squares: print(x)
Using a wrapper function to handle StopIteration
You could write a simple wrapper function to your generator-generating function that tracks when the generator is exhausted. It will do so using the StopIteration exception a generator throws when it reaches end of iteration.
import types
def generator_wrapper(function=None, **kwargs):
assert function is not None, "Please supply a function"
def inner_func(function=function, **kwargs):
generator = function(**kwargs)
assert isinstance(generator, types.GeneratorType), "Invalid function"
try:
yield next(generator)
except StopIteration:
generator = function(**kwargs)
yield next(generator)
return inner_func
As you can spot above, when our wrapper function catches a StopIteration exception, it simply re-initializes the generator object (using another instance of the function call).
And then, assuming you define your generator-supplying function somewhere as below, you could use the Python function decorator syntax to wrap it implicitly:
#generator_wrapper
def generator_generating_function(**kwargs):
for item in ["a value", "another value"]
yield item
If GrzegorzOledzki's answer won't suffice, you could probably use send() to accomplish your goal. See PEP-0342 for more details on enhanced generators and yield expressions.
UPDATE: Also see itertools.tee(). It involves some of that memory vs. processing tradeoff mentioned above, but it might save some memory over just storing the generator results in a list; it depends on how you're using the generator.
If your generator is pure in a sense that its output only depends on passed arguments and the step number, and you want the resulting generator to be restartable, here's a sort snippet that might be handy:
import copy
def generator(i):
yield from range(i)
g = generator(10)
print(list(g))
print(list(g))
class GeneratorRestartHandler(object):
def __init__(self, gen_func, argv, kwargv):
self.gen_func = gen_func
self.argv = copy.copy(argv)
self.kwargv = copy.copy(kwargv)
self.local_copy = iter(self)
def __iter__(self):
return self.gen_func(*self.argv, **self.kwargv)
def __next__(self):
return next(self.local_copy)
def restartable(g_func: callable) -> callable:
def tmp(*argv, **kwargv):
return GeneratorRestartHandler(g_func, argv, kwargv)
return tmp
#restartable
def generator2(i):
yield from range(i)
g = generator2(10)
print(next(g))
print(list(g))
print(list(g))
print(next(g))
outputs:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[]
0
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1
From official documentation of tee:
In general, if one iterator uses most or all of the data before
another iterator starts, it is faster to use list() instead of tee().
So it's best to use list(iterable) instead in your case.
You can define a function that returns your generator
def f():
def FunctionWithYield(generator_args):
code here...
return FunctionWithYield
Now you can just do as many times as you like:
for x in f()(generator_args): print(x)
for x in f()(generator_args): print(x)
I'm not sure what you meant by expensive preparation, but I guess you actually have
data = ... # Expensive computation
y = FunctionWithYield(data)
for x in y: print(x)
#here must be something to reset 'y'
# this is expensive - data = ... # Expensive computation
# y = FunctionWithYield(data)
for x in y: print(x)
If that's the case, why not reuse data?
There is no option to reset iterators. Iterator usually pops out when it iterate through next() function. Only way is to take a backup before iterate on the iterator object. Check below.
Creating iterator object with items 0 to 9
i=iter(range(10))
Iterating through next() function which will pop out
print(next(i))
Converting the iterator object to list
L=list(i)
print(L)
output: [1, 2, 3, 4, 5, 6, 7, 8, 9]
so item 0 is already popped out. Also all the items are popped as we converted the iterator to list.
next(L)
Traceback (most recent call last):
File "<pyshell#129>", line 1, in <module>
next(L)
StopIteration
So you need to convert the iterator to lists for backup before start iterating.
List could be converted to iterator with iter(<list-object>)
You can now use more_itertools.seekable (a third-party tool) which enables resetting iterators.
Install via > pip install more_itertools
import more_itertools as mit
y = mit.seekable(FunctionWithYield())
for x in y:
print(x)
y.seek(0) # reset iterator
for x in y:
print(x)
Note: memory consumption grows while advancing the iterator, so be wary of large iterables.
You can do that by using itertools.cycle()
you can create an iterator with this method and then execute a for loop over the iterator which will loop over its values.
For example:
def generator():
for j in cycle([i for i in range(5)]):
yield j
gen = generator()
for i in range(20):
print(next(gen))
will generate 20 numbers, 0 to 4 repeatedly.
A note from the docs:
Note, this member of the toolkit may require significant auxiliary storage (depending on the length of the iterable).
How it's work for me.
csv_rows = my_generator()
for _ in range(10):
for row in csv_rows:
print(row)
csv_rows = my_generator()
Ok, you say you want to call a generator multiple times, but initialization is expensive... What about something like this?
class InitializedFunctionWithYield(object):
def __init__(self):
# do expensive initialization
self.start = 5
def __call__(self, *args, **kwargs):
# do cheap iteration
for i in xrange(5):
yield self.start + i
y = InitializedFunctionWithYield()
for x in y():
print x
for x in y():
print x
Alternatively, you could just make your own class that follows the iterator protocol and defines some sort of 'reset' function.
class MyIterator(object):
def __init__(self):
self.reset()
def reset(self):
self.i = 5
def __iter__(self):
return self
def next(self):
i = self.i
if i > 0:
self.i -= 1
return i
else:
raise StopIteration()
my_iterator = MyIterator()
for x in my_iterator:
print x
print 'resetting...'
my_iterator.reset()
for x in my_iterator:
print x
https://docs.python.org/2/library/stdtypes.html#iterator-types
http://anandology.com/python-practice-book/iterators.html
My answer solves slightly different problem: If the generator is expensive to initialize and each generated object is expensive to generate. But we need to consume the generator multiple times in multiple functions. In order to call the generator and each generated object exactly once we can use threads and Run each of the consuming methods in different thread. We may not achieve true parallelism due to GIL, but we will achieve our goal.
This approach did a good job in the following case: deep learning model processes a lot of images. The result is a lot of masks for a lot of objects on the image. Each mask consumes memory. We have around 10 methods which make different statistics and metrics, but they take all the images at once. All the images cannot fit in memory. The moethods can easily be rewritten to accept iterator.
class GeneratorSplitter:
'''
Split a generator object into multiple generators which will be sincronised. Each call to each of the sub generators will cause only one call in the input generator. This way multiple methods on threads can iterate the input generator , and the generator will cycled only once.
'''
def __init__(self, gen):
self.gen = gen
self.consumers: List[GeneratorSplitter.InnerGen] = []
self.thread: threading.Thread = None
self.value = None
self.finished = False
self.exception = None
def GetConsumer(self):
# Returns a generator object.
cons = self.InnerGen(self)
self.consumers.append(cons)
return cons
def _Work(self):
try:
for d in self.gen:
for cons in self.consumers:
cons.consumed.wait()
cons.consumed.clear()
self.value = d
for cons in self.consumers:
cons.readyToRead.set()
for cons in self.consumers:
cons.consumed.wait()
self.finished = True
for cons in self.consumers:
cons.readyToRead.set()
except Exception as ex:
self.exception = ex
for cons in self.consumers:
cons.readyToRead.set()
def Start(self):
self.thread = threading.Thread(target=self._Work)
self.thread.start()
class InnerGen:
def __init__(self, parent: "GeneratorSplitter"):
self.parent: "GeneratorSplitter" = parent
self.readyToRead: threading.Event = threading.Event()
self.consumed: threading.Event = threading.Event()
self.consumed.set()
def __iter__(self):
return self
def __next__(self):
self.readyToRead.wait()
self.readyToRead.clear()
if self.parent.finished:
raise StopIteration()
if self.parent.exception:
raise self.parent.exception
val = self.parent.value
self.consumed.set()
return val
Ussage:
genSplitter = GeneratorSplitter(expensiveGenerator)
metrics={}
executor = ThreadPoolExecutor(max_workers=3)
f1 = executor.submit(mean,genSplitter.GetConsumer())
f2 = executor.submit(max,genSplitter.GetConsumer())
f3 = executor.submit(someFancyMetric,genSplitter.GetConsumer())
genSplitter.Start()
metrics.update(f1.result())
metrics.update(f2.result())
metrics.update(f3.result())
If you want to reuse this generator multiple times with a predefined set of arguments, you can use functools.partial.
from functools import partial
func_with_yield = partial(FunctionWithYield, arg0, arg1)
for i in range(100):
for x in func_with_yield():
print(x)
This will wrap the generator function in another function so each time you call func_with_yield() it creates the same generator function.
It can be done by code object. Here is the example.
code_str="y=(a for a in [1,2,3,4])"
code1=compile(code_str,'<string>','single')
exec(code1)
for i in y: print i
1
2
3
4
for i in y: print i
exec(code1)
for i in y: print i
1
2
3
4

Categories

Resources