Pythonic way to get next iterable value without incrementing iterator [duplicate] - python

I can't figure out how to look ahead one element in a Python generator. As soon as I look it's gone.
Here is what I mean:
gen = iter([1,2,3])
next_value = gen.next() # okay, I looked forward and see that next_value = 1
# but now:
list(gen) # is [2, 3] -- the first value is gone!
Here is a more real example:
gen = element_generator()
if gen.next_value() == 'STOP':
quit_application()
else:
process(gen.next())
Can anyone help me write a generator that you can look one element forward?
See also: Resetting generator object in Python

For sake of completeness, the more-itertools package (which should probably be part of any Python programmer's toolbox) includes a peekable wrapper that implements this behavior. As the code example in the documentation shows:
>>> p = peekable(['a', 'b'])
>>> p.peek()
'a'
>>> next(p)
'a'
However, it's often possible to rewrite code that would use this functionality so that it doesn't actually need it. For example, your realistic code sample from the question could be written like this:
gen = element_generator()
command = gen.next_value()
if command == 'STOP':
quit_application()
else:
process(command)
(reader's note: I've preserved the syntax in the example from the question as of when I'm writing this, even though it refers to an outdated version of Python)

The Python generator API is one way: You can't push back elements you've read. But you can create a new iterator using the itertools module and prepend the element:
import itertools
gen = iter([1,2,3])
peek = gen.next()
print list(itertools.chain([peek], gen))

Ok - two years too late - but I came across this question, and did not find any of the answers to my satisfaction. Came up with this meta generator:
class Peekorator(object):
def __init__(self, generator):
self.empty = False
self.peek = None
self.generator = generator
try:
self.peek = self.generator.next()
except StopIteration:
self.empty = True
def __iter__(self):
return self
def next(self):
"""
Return the self.peek element, or raise StopIteration
if empty
"""
if self.empty:
raise StopIteration()
to_return = self.peek
try:
self.peek = self.generator.next()
except StopIteration:
self.peek = None
self.empty = True
return to_return
def simple_iterator():
for x in range(10):
yield x*3
pkr = Peekorator(simple_iterator())
for i in pkr:
print i, pkr.peek, pkr.empty
results in:
0 3 False
3 6 False
6 9 False
9 12 False
...
24 27 False
27 None False
i.e. you have at any moment during iteration access to the next item in the list.

Using itertools.tee will produce a lightweight copy of the generator; then peeking ahead at one copy will not affect the second copy. Thus:
import itertools
def process(seq):
peeker, items = itertools.tee(seq)
# initial peek ahead
# so that peeker is one ahead of items
if next(peeker) == 'STOP':
return
for item in items:
# peek ahead
if next(peeker) == "STOP":
return
# process items
print(item)
The items generator is unaffected by modifications to peeker. However, modifying seq after the call to tee may cause problems.
That said: any algorithm that requires looking an item ahead in a generator could instead be written to use the current generator item and the previous item. This will result in simpler code - see my other answer to this question.

An iterator that allows peeking at the next element and also further ahead. It reads ahead as needed and remembers the values in a deque.
from collections import deque
class PeekIterator:
def __init__(self, iterable):
self.iterator = iter(iterable)
self.peeked = deque()
def __iter__(self):
return self
def __next__(self):
if self.peeked:
return self.peeked.popleft()
return next(self.iterator)
def peek(self, ahead=0):
while len(self.peeked) <= ahead:
self.peeked.append(next(self.iterator))
return self.peeked[ahead]
Demo:
>>> it = PeekIterator(range(10))
>>> it.peek()
0
>>> it.peek(5)
5
>>> it.peek(13)
Traceback (most recent call last):
File "<pyshell#68>", line 1, in <module>
it.peek(13)
File "[...]", line 15, in peek
self.peeked.append(next(self.iterator))
StopIteration
>>> it.peek(2)
2
>>> next(it)
0
>>> it.peek(2)
3
>>> list(it)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>

>>> gen = iter(range(10))
>>> peek = next(gen)
>>> peek
0
>>> gen = (value for g in ([peek], gen) for value in g)
>>> list(gen)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

Just for fun, I created an implementation of a lookahead class based on the suggestion by
Aaron:
import itertools
class lookahead_chain(object):
def __init__(self, it):
self._it = iter(it)
def __iter__(self):
return self
def next(self):
return next(self._it)
def peek(self, default=None, _chain=itertools.chain):
it = self._it
try:
v = self._it.next()
self._it = _chain((v,), it)
return v
except StopIteration:
return default
lookahead = lookahead_chain
With this, the following will work:
>>> t = lookahead(xrange(8))
>>> list(itertools.islice(t, 3))
[0, 1, 2]
>>> t.peek()
3
>>> list(itertools.islice(t, 3))
[3, 4, 5]
With this implementation it is a bad idea to call peek many times in a row...
While looking at the CPython source code I just found a better way which is both shorter and more efficient:
class lookahead_tee(object):
def __init__(self, it):
self._it, = itertools.tee(it, 1)
def __iter__(self):
return self._it
def peek(self, default=None):
try:
return self._it.__copy__().next()
except StopIteration:
return default
lookahead = lookahead_tee
Usage is the same as above but you won't pay a price here to use peek many times in a row. With a few more lines you can also look ahead more than one item in the iterator (up to available RAM).

A simple solution is to use a function like this:
def peek(it):
first = next(it)
return first, itertools.chain([first], it)
Then you can do:
>>> it = iter(range(10))
>>> x, it = peek(it)
>>> x
0
>>> next(it)
0
>>> next(it)
1

If anybody is interested, and please correct me if I am wrong, but I believe it is pretty easy to add some push back functionality to any iterator.
class Back_pushable_iterator:
"""Class whose constructor takes an iterator as its only parameter, and
returns an iterator that behaves in the same way, with added push back
functionality.
The idea is to be able to push back elements that need to be retrieved once
more with the iterator semantics. This is particularly useful to implement
LL(k) parsers that need k tokens of lookahead. Lookahead or push back is
really a matter of perspective. The pushing back strategy allows a clean
parser implementation based on recursive parser functions.
The invoker of this class takes care of storing the elements that should be
pushed back. A consequence of this is that any elements can be "pushed
back", even elements that have never been retrieved from the iterator.
The elements that are pushed back are then retrieved through the iterator
interface in a LIFO-manner (as should logically be expected).
This class works for any iterator but is especially meaningful for a
generator iterator, which offers no obvious push back ability.
In the LL(k) case mentioned above, the tokenizer can be implemented by a
standard generator function (clean and simple), that is completed by this
class for the needs of the actual parser.
"""
def __init__(self, iterator):
self.iterator = iterator
self.pushed_back = []
def __iter__(self):
return self
def __next__(self):
if self.pushed_back:
return self.pushed_back.pop()
else:
return next(self.iterator)
def push_back(self, element):
self.pushed_back.append(element)
it = Back_pushable_iterator(x for x in range(10))
x = next(it) # 0
print(x)
it.push_back(x)
x = next(it) # 0
print(x)
x = next(it) # 1
print(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
it.push_back(y)
it.push_back(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
for x in it:
print(x) # 4-9

This will work -- it buffers an item and calls a function with each item and the next item in the sequence.
Your requirements are murky on what happens at the end of the sequence. What does "look ahead" mean when you're at the last one?
def process_with_lookahead( iterable, aFunction ):
prev= iterable.next()
for item in iterable:
aFunction( prev, item )
prev= item
aFunction( item, None )
def someLookaheadFunction( item, next_item ):
print item, next_item

Instead of using items (i, i+1), where 'i' is the current item and i+1 is the 'peek ahead' version, you should be using (i-1, i), where 'i-1' is the previous version from the generator.
Tweaking your algorithm this way will produce something that is identical to what you currently have, apart from the extra needless complexity of trying to 'peek ahead'.
Peeking ahead is a mistake, and you should not be doing it.

Although itertools.chain() is the natural tool for the job here, beware of loops like this:
for elem in gen:
...
peek = next(gen)
gen = itertools.chain([peek], gen)
...Because this will consume a linearly growing amount of memory, and eventually grind to a halt. (This code essentially seems to create a linked list, one node per chain() call.) I know this not because I inspected the libs but because this just resulted in a major slowdown of my program - getting rid of the gen = itertools.chain([peek], gen) line sped it up again. (Python 3.3)

Python3 snippet for #jonathan-hartley answer:
def peek(iterator, eoi=None):
iterator = iter(iterator)
try:
prev = next(iterator)
except StopIteration:
return iterator
for elm in iterator:
yield prev, elm
prev = elm
yield prev, eoi
for curr, nxt in peek(range(10)):
print((curr, nxt))
# (0, 1)
# (1, 2)
# (2, 3)
# (3, 4)
# (4, 5)
# (5, 6)
# (6, 7)
# (7, 8)
# (8, 9)
# (9, None)
It'd be straightforward to create a class that does this on __iter__ and yields just the prev item and put the elm in some attribute.

w.r.t #David Z's post, the newer seekable tool can reset a wrapped iterator to a prior position.
>>> s = mit.seekable(range(3))
>>> s.next()
# 0
>>> s.seek(0) # reset iterator
>>> s.next()
# 0
>>> s.next()
# 1
>>> s.seek(1)
>>> s.next()
# 1
>>> next(s)
# 2

cytoolz has a peek function.
>> from cytoolz import peek
>> gen = iter([1,2,3])
>> first, continuation = peek(gen)
>> first
1
>> list(continuation)
[1, 2, 3]

In my case, I need a generator where I could queue back to generator the data I have just got via next() call.
The way I handle this problem, is to create a queue. In the implementation of the generator, I would first check the queue: if queue is not empty, the "yield" will return the values in queue, or otherwise the values in normal way.
import queue
def gen1(n, q):
i = 0
while True:
if not q.empty():
yield q.get()
else:
yield i
i = i + 1
if i >= n:
if not q.empty():
yield q.get()
break
q = queue.Queue()
f = gen1(2, q)
i = next(f)
print(i)
i = next(f)
print(i)
q.put(i) # put back the value I have just got for following 'next' call
i = next(f)
print(i)
running
python3 gen_test.py
0
1
1
This concept is very useful when I was writing a parser, which needs to look the file line by line, if the line appears to belong to next phase of parsing, I could just queue back to the generator so that the next phase of code could parse it correctly without handling complex state.

For those of you who embrace frugality and one-liners, I present to you a one-liner that allows one to look ahead in an iterable (this only works in Python 3.8 and above):
>>> import itertools as it
>>> peek = lambda iterable, n=1: it.islice(zip(it.chain((t := it.tee(iterable))[0], [None] * n), it.chain([None] * n, t[1])), n, None)
>>> for lookahead, element in peek(range(10)):
... print(lookahead, element)
1 0
2 1
3 2
4 3
5 4
6 5
7 6
8 7
9 8
None 9
>>> for lookahead, element in peek(range(10), 2):
... print(lookahead, element)
2 0
3 1
4 2
5 3
6 4
7 5
8 6
9 7
None 8
None 9
This method is space-efficient by avoiding copying the iterator multiple times. It is also fast due to how it lazily generates elements. Finally, as a cherry on top, you can look ahead an arbitrary number of elements.

An algorithm that works by "peeking" at the next element in a generator could equivalently be one that works by remembering the previous element, treating that element as the one to operate upon, and treating the "current" element as simply "peeked at".
Either way, what is really happening is that the algorithm considers overlapping pairs from the generator. The itertools.tee recipe will work fine - and it is not hard to see that it is essentially a refactored version of Jonathan Hartley's approach:
from itertools import tee
# From https://docs.python.org/3/library/itertools.html#itertools.pairwise
# In 3.10 and up, this is directly supplied by the `itertools` module.
def pairwise(iterable):
# pairwise('ABCDEFG') --> AB BC CD DE EF FG
a, b = tee(iterable)
next(b, None)
return zip(a, b)
def process(seq):
for to_process, lookahead in pairwise(seq):
# peek ahead
if lookahead == "STOP":
return
# process items
print(to_process)

Related

How not to miss the next element after itertools.takewhile()

Say we wish to process an iterator and want to handle it by chunks.
The logic per chunk depends on previously-calculated chunks, so groupby() does not help.
Our friend in this case is itertools.takewhile():
while True:
chunk = itertools.takewhile(getNewChunkLogic(), myIterator)
process(chunk)
The problem is that takewhile() needs to go past the last element that meets the new chunk logic, thus 'eating' the first element for the next chunk.
There are various solutions to that, including wrapping or à la C's ungetc(), etc..
My question is: is there an elegant solution?
takewhile() indeed needs to look at the next element to determine when to toggle behaviour.
You could use a wrapper that tracks the last seen element, and that can be 'reset' to back up one element:
_sentinel = object()
class OneStepBuffered(object):
def __init__(self, it):
self._it = iter(it)
self._last = _sentinel
self._next = _sentinel
def __iter__(self):
return self
def __next__(self):
if self._next is not _sentinel:
next_val, self._next = self._next, _sentinel
return next_val
try:
self._last = next(self._it)
return self._last
except StopIteration:
self._last = self._next = _sentinel
raise
next = __next__ # Python 2 compatibility
def step_back(self):
if self._last is _sentinel:
raise ValueError("Can't back up a step")
self._next, self._last = self._last, _sentinel
Wrap your iterator in this one before using it with takewhile():
myIterator = OneStepBuffered(myIterator)
while True:
chunk = itertools.takewhile(getNewChunkLogic(), myIterator)
process(chunk)
myIterator.step_back()
Demo:
>>> from itertools import takewhile
>>> test_list = range(10)
>>> iterator = OneStepBuffered(test_list)
>>> list(takewhile(lambda i: i < 5, iterator))
[0, 1, 2, 3, 4]
>>> iterator.step_back()
>>> list(iterator)
[5, 6, 7, 8, 9]
I had the same problem. You might wish to use itertools.tee or itertools.pairwise (new in Python 3.10) to deal with this, but I didn't think those solutions were very elegant.
The best I found is to just rewrite takewhile. Based heavily on the documentation:
def takewhile_inclusive(predicate, it):
for x in it:
if predicate(x):
yield x
else:
yield x
break
In your loop you can elegantly treat the final element separately using unpacking:
*chunk,lastPiece = takewhile_inclusive(getNewChunkLogic(), myIterator)
You can then chain the last piece:
lastPiece = None
while True:
*chunk,lastPiece = takewhile_inclusive(getNewChunkLogic(), myIterator)
if lastPiece is not None:
myIterator = itertools.chain([lastPiece], myIterator))
Given the callable GetNewChunkLogic() will report True along first chunk and False afterward.
The following snippet
solves the 'additional next step' problem of takewhile .
is elegant because you don't have to implement the back-one-step logic .
def partition(pred, iterable):
'Use a predicate to partition entries into true entries and false entries'
# partition(is_odd, range(10)) --> 1 3 5 7 9 and 0 2 4 6 8
t1, t2 = tee(iterable)
return filter(pred, t1), filterfalse(pred, t2)
while True:
head, tail = partition(GetNewChunkLogic(), myIterator)
process(head)
myIterator = tail
However, the most elegant way is to modify your GetNewChunkLogic into a generator and remove the while loop.
Here's another way you can do it. Yield a value (sentinel) when the predicate fails, but before yielding the value itself. Then group by values which aren't the sentinel.
Here, group_by_predicate requires a function that returns a predicate (pred_gen). This is recreated every time the predicate fails:
from itertools import groupby
def group_by_predicate(predicate_gen, _iter):
sentinel = object()
def _group_with_sentinel():
pred = predicate_gen()
for n in _iter:
while not pred(n):
yield sentinel
pred = predicate_gen()
yield n
g = _group_with_sentinel()
for k, g in groupby(g, lambda s: s!=sentinel):
if k:
yield g
This can then be used like:
def less_than_gen(maxn):
"""Return a predicate that returns true while the sum of inputs is < maxn"""
def pred(i):
pred.count += i
return pred.count < maxn
pred.count = 0
return pred
data = iter(list(range(9)) * 3)
for g in group_by_predicate(lambda: less_than_gen(15), data):
print(list(g))
Which outputs groups of numbers whose sum is all less than 15:
[0, 1, 2, 3, 4]
[5, 6]
[7]
[8, 0, 1, 2, 3]
[4, 5]
[6, 7]
[8, 0, 1, 2, 3]
[4, 5]
[6, 7]
[8]

How to write foldr (right fold) generator in Python?

Python's reduce is a left-fold, which means it is tail-recursive and its uses can be neatly rewritten as a loop. However, Python does not have a built-in function for doing right folds. Since right-folds are most naturally written with recursion (and Python doesn't like recursion as much as functional languages), I'm interested in writing a right fold (foldr) in terms of a generator.
How can this be done? And very specifically, how can it be done in Python 2.7?
EDIT: I should have mentioned that one of the benefits to foldr is that you can sometimes fold on infinite lists without risk of eating your stack alive. I would like to see answers that preserve this property.
For example, Haskell's foldr is lazy on both input and output and can allow for short-circuiting "step" functions to work on long/infinite inputs:
foldr (&&) True (repeat False) -- gives False
Any Python variant that uses list/reversed/etc. on the input will hang if given itertools.repeat(some_value).
Note that Python's reduce chokes in the same example because of strictness:
reduce(lambda x, y: x and y, itertools.repeat(False), True) # hangs
So a simple generator in python (without appropriate error checking):
def foldr(op, lst):
l, x = reversed(list(lst)), None
for i in l:
if not x:
x = i
continue
x = op(x, i)
yield x
e.g.:
>>> from operator import mul
>>> for i in foldr(mul, [1,2,3,4]):
... print i
24
24
12
Almost identical to the 'roughly equivalent' implementation of reduce in the documentation:
def foldr(function, iterable, initializer=None):
it = reversed(list(iterable))
if initializer is None:
try:
initializer = next(it)
except StopIteration:
raise TypeError('foldr() of empty sequence with no initial value')
accum_value = initializer
for x in it:
accum_value = function(accum_value, x)
yield accum_value
[Edit]
So purely as an exercise of the mind and with very little practical value, it is possible to defer as long as there is some cooperation between the function that you a folding over... e.g.:
class Defer(object):
def __init__(self, func, *args):
self.func = func
self.args = args
def __bool__(self):
return self.func(*self.args)
def __int__(self):
return self.func(*self.args)
def foldr(function, iterable, initializer):
it = iter(iterable)
try:
return function(next(it), Defer(foldr, function, it, initializer))
except StopIteration:
return initializer
Then as long as the function converts to the right type you can defer the calculation, however this will not work with native operators, so not sure how useful this really is:
>>> print(foldr(lambda a, b: int(a)*int(b), [1,2,3,4], 1))
24
Defining a forever generator:
from itertools import repeat
def forever():
yield False
yield True
for i in repeat(False):
yield i
Folding or across an infinite list, returns when it finds a True
>>> print(foldr(lambda a, b: bool(a) or bool(b), forever(), False))
True
You will have to catch appropriate exceptions but should be an idea of how to do it iteratively:
def foldr(a, b, l):
if isinstance(l, Iterator):
it = reversed(list(l))
else:
it = reversed(l)
try:
nxt = next(it)
except StopIteration:
return
c = a(nxt, b)
stop = object()
while nxt is not stop:
yield c
nxt = next(it, stop)
c = a(nxt, c) if nxt is not stop else c
from operator import truediv
for c in (foldr(truediv, 1, [1, 2, 3, 4, 5, 6, 7, 8])):
print(c)
If you are going to define a function using generators, why not use the following?
def foldr(op, lst):
return reduce(op, reversed(lst))
I think something like this is what you want:
def foldr(fn, seq, init):
it = iter(seq)
try:
x = next(it)
except StopIteration:
try:
for elem in init:
yield elem
except TypeError:
yield init
else:
try:
for elem in fn(x, foldr(fn, it, init)):
yield elem
except TypeError:
yield fn(x, foldr(fn, it, init))
It's not exactly production-ready since it will hit the Python stack limit pretty quickly and it will be surprising in the presence of side-effecting functions due to the double call to fn, but it should be enough to give you an idea.

How to look ahead one element (peek) in a Python generator?

I can't figure out how to look ahead one element in a Python generator. As soon as I look it's gone.
Here is what I mean:
gen = iter([1,2,3])
next_value = gen.next() # okay, I looked forward and see that next_value = 1
# but now:
list(gen) # is [2, 3] -- the first value is gone!
Here is a more real example:
gen = element_generator()
if gen.next_value() == 'STOP':
quit_application()
else:
process(gen.next())
Can anyone help me write a generator that you can look one element forward?
See also: Resetting generator object in Python
For sake of completeness, the more-itertools package (which should probably be part of any Python programmer's toolbox) includes a peekable wrapper that implements this behavior. As the code example in the documentation shows:
>>> p = peekable(['a', 'b'])
>>> p.peek()
'a'
>>> next(p)
'a'
However, it's often possible to rewrite code that would use this functionality so that it doesn't actually need it. For example, your realistic code sample from the question could be written like this:
gen = element_generator()
command = gen.next_value()
if command == 'STOP':
quit_application()
else:
process(command)
(reader's note: I've preserved the syntax in the example from the question as of when I'm writing this, even though it refers to an outdated version of Python)
The Python generator API is one way: You can't push back elements you've read. But you can create a new iterator using the itertools module and prepend the element:
import itertools
gen = iter([1,2,3])
peek = gen.next()
print list(itertools.chain([peek], gen))
Ok - two years too late - but I came across this question, and did not find any of the answers to my satisfaction. Came up with this meta generator:
class Peekorator(object):
def __init__(self, generator):
self.empty = False
self.peek = None
self.generator = generator
try:
self.peek = self.generator.next()
except StopIteration:
self.empty = True
def __iter__(self):
return self
def next(self):
"""
Return the self.peek element, or raise StopIteration
if empty
"""
if self.empty:
raise StopIteration()
to_return = self.peek
try:
self.peek = self.generator.next()
except StopIteration:
self.peek = None
self.empty = True
return to_return
def simple_iterator():
for x in range(10):
yield x*3
pkr = Peekorator(simple_iterator())
for i in pkr:
print i, pkr.peek, pkr.empty
results in:
0 3 False
3 6 False
6 9 False
9 12 False
...
24 27 False
27 None False
i.e. you have at any moment during iteration access to the next item in the list.
Using itertools.tee will produce a lightweight copy of the generator; then peeking ahead at one copy will not affect the second copy. Thus:
import itertools
def process(seq):
peeker, items = itertools.tee(seq)
# initial peek ahead
# so that peeker is one ahead of items
if next(peeker) == 'STOP':
return
for item in items:
# peek ahead
if next(peeker) == "STOP":
return
# process items
print(item)
The items generator is unaffected by modifications to peeker. However, modifying seq after the call to tee may cause problems.
That said: any algorithm that requires looking an item ahead in a generator could instead be written to use the current generator item and the previous item. This will result in simpler code - see my other answer to this question.
An iterator that allows peeking at the next element and also further ahead. It reads ahead as needed and remembers the values in a deque.
from collections import deque
class PeekIterator:
def __init__(self, iterable):
self.iterator = iter(iterable)
self.peeked = deque()
def __iter__(self):
return self
def __next__(self):
if self.peeked:
return self.peeked.popleft()
return next(self.iterator)
def peek(self, ahead=0):
while len(self.peeked) <= ahead:
self.peeked.append(next(self.iterator))
return self.peeked[ahead]
Demo:
>>> it = PeekIterator(range(10))
>>> it.peek()
0
>>> it.peek(5)
5
>>> it.peek(13)
Traceback (most recent call last):
File "<pyshell#68>", line 1, in <module>
it.peek(13)
File "[...]", line 15, in peek
self.peeked.append(next(self.iterator))
StopIteration
>>> it.peek(2)
2
>>> next(it)
0
>>> it.peek(2)
3
>>> list(it)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
>>> gen = iter(range(10))
>>> peek = next(gen)
>>> peek
0
>>> gen = (value for g in ([peek], gen) for value in g)
>>> list(gen)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Just for fun, I created an implementation of a lookahead class based on the suggestion by
Aaron:
import itertools
class lookahead_chain(object):
def __init__(self, it):
self._it = iter(it)
def __iter__(self):
return self
def next(self):
return next(self._it)
def peek(self, default=None, _chain=itertools.chain):
it = self._it
try:
v = self._it.next()
self._it = _chain((v,), it)
return v
except StopIteration:
return default
lookahead = lookahead_chain
With this, the following will work:
>>> t = lookahead(xrange(8))
>>> list(itertools.islice(t, 3))
[0, 1, 2]
>>> t.peek()
3
>>> list(itertools.islice(t, 3))
[3, 4, 5]
With this implementation it is a bad idea to call peek many times in a row...
While looking at the CPython source code I just found a better way which is both shorter and more efficient:
class lookahead_tee(object):
def __init__(self, it):
self._it, = itertools.tee(it, 1)
def __iter__(self):
return self._it
def peek(self, default=None):
try:
return self._it.__copy__().next()
except StopIteration:
return default
lookahead = lookahead_tee
Usage is the same as above but you won't pay a price here to use peek many times in a row. With a few more lines you can also look ahead more than one item in the iterator (up to available RAM).
A simple solution is to use a function like this:
def peek(it):
first = next(it)
return first, itertools.chain([first], it)
Then you can do:
>>> it = iter(range(10))
>>> x, it = peek(it)
>>> x
0
>>> next(it)
0
>>> next(it)
1
If anybody is interested, and please correct me if I am wrong, but I believe it is pretty easy to add some push back functionality to any iterator.
class Back_pushable_iterator:
"""Class whose constructor takes an iterator as its only parameter, and
returns an iterator that behaves in the same way, with added push back
functionality.
The idea is to be able to push back elements that need to be retrieved once
more with the iterator semantics. This is particularly useful to implement
LL(k) parsers that need k tokens of lookahead. Lookahead or push back is
really a matter of perspective. The pushing back strategy allows a clean
parser implementation based on recursive parser functions.
The invoker of this class takes care of storing the elements that should be
pushed back. A consequence of this is that any elements can be "pushed
back", even elements that have never been retrieved from the iterator.
The elements that are pushed back are then retrieved through the iterator
interface in a LIFO-manner (as should logically be expected).
This class works for any iterator but is especially meaningful for a
generator iterator, which offers no obvious push back ability.
In the LL(k) case mentioned above, the tokenizer can be implemented by a
standard generator function (clean and simple), that is completed by this
class for the needs of the actual parser.
"""
def __init__(self, iterator):
self.iterator = iterator
self.pushed_back = []
def __iter__(self):
return self
def __next__(self):
if self.pushed_back:
return self.pushed_back.pop()
else:
return next(self.iterator)
def push_back(self, element):
self.pushed_back.append(element)
it = Back_pushable_iterator(x for x in range(10))
x = next(it) # 0
print(x)
it.push_back(x)
x = next(it) # 0
print(x)
x = next(it) # 1
print(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
it.push_back(y)
it.push_back(x)
x = next(it) # 2
y = next(it) # 3
print(x)
print(y)
for x in it:
print(x) # 4-9
This will work -- it buffers an item and calls a function with each item and the next item in the sequence.
Your requirements are murky on what happens at the end of the sequence. What does "look ahead" mean when you're at the last one?
def process_with_lookahead( iterable, aFunction ):
prev= iterable.next()
for item in iterable:
aFunction( prev, item )
prev= item
aFunction( item, None )
def someLookaheadFunction( item, next_item ):
print item, next_item
Instead of using items (i, i+1), where 'i' is the current item and i+1 is the 'peek ahead' version, you should be using (i-1, i), where 'i-1' is the previous version from the generator.
Tweaking your algorithm this way will produce something that is identical to what you currently have, apart from the extra needless complexity of trying to 'peek ahead'.
Peeking ahead is a mistake, and you should not be doing it.
Although itertools.chain() is the natural tool for the job here, beware of loops like this:
for elem in gen:
...
peek = next(gen)
gen = itertools.chain([peek], gen)
...Because this will consume a linearly growing amount of memory, and eventually grind to a halt. (This code essentially seems to create a linked list, one node per chain() call.) I know this not because I inspected the libs but because this just resulted in a major slowdown of my program - getting rid of the gen = itertools.chain([peek], gen) line sped it up again. (Python 3.3)
Python3 snippet for #jonathan-hartley answer:
def peek(iterator, eoi=None):
iterator = iter(iterator)
try:
prev = next(iterator)
except StopIteration:
return iterator
for elm in iterator:
yield prev, elm
prev = elm
yield prev, eoi
for curr, nxt in peek(range(10)):
print((curr, nxt))
# (0, 1)
# (1, 2)
# (2, 3)
# (3, 4)
# (4, 5)
# (5, 6)
# (6, 7)
# (7, 8)
# (8, 9)
# (9, None)
It'd be straightforward to create a class that does this on __iter__ and yields just the prev item and put the elm in some attribute.
w.r.t #David Z's post, the newer seekable tool can reset a wrapped iterator to a prior position.
>>> s = mit.seekable(range(3))
>>> s.next()
# 0
>>> s.seek(0) # reset iterator
>>> s.next()
# 0
>>> s.next()
# 1
>>> s.seek(1)
>>> s.next()
# 1
>>> next(s)
# 2
cytoolz has a peek function.
>> from cytoolz import peek
>> gen = iter([1,2,3])
>> first, continuation = peek(gen)
>> first
1
>> list(continuation)
[1, 2, 3]
In my case, I need a generator where I could queue back to generator the data I have just got via next() call.
The way I handle this problem, is to create a queue. In the implementation of the generator, I would first check the queue: if queue is not empty, the "yield" will return the values in queue, or otherwise the values in normal way.
import queue
def gen1(n, q):
i = 0
while True:
if not q.empty():
yield q.get()
else:
yield i
i = i + 1
if i >= n:
if not q.empty():
yield q.get()
break
q = queue.Queue()
f = gen1(2, q)
i = next(f)
print(i)
i = next(f)
print(i)
q.put(i) # put back the value I have just got for following 'next' call
i = next(f)
print(i)
running
python3 gen_test.py
0
1
1
This concept is very useful when I was writing a parser, which needs to look the file line by line, if the line appears to belong to next phase of parsing, I could just queue back to the generator so that the next phase of code could parse it correctly without handling complex state.
For those of you who embrace frugality and one-liners, I present to you a one-liner that allows one to look ahead in an iterable (this only works in Python 3.8 and above):
>>> import itertools as it
>>> peek = lambda iterable, n=1: it.islice(zip(it.chain((t := it.tee(iterable))[0], [None] * n), it.chain([None] * n, t[1])), n, None)
>>> for lookahead, element in peek(range(10)):
... print(lookahead, element)
1 0
2 1
3 2
4 3
5 4
6 5
7 6
8 7
9 8
None 9
>>> for lookahead, element in peek(range(10), 2):
... print(lookahead, element)
2 0
3 1
4 2
5 3
6 4
7 5
8 6
9 7
None 8
None 9
This method is space-efficient by avoiding copying the iterator multiple times. It is also fast due to how it lazily generates elements. Finally, as a cherry on top, you can look ahead an arbitrary number of elements.
An algorithm that works by "peeking" at the next element in a generator could equivalently be one that works by remembering the previous element, treating that element as the one to operate upon, and treating the "current" element as simply "peeked at".
Either way, what is really happening is that the algorithm considers overlapping pairs from the generator. The itertools.tee recipe will work fine - and it is not hard to see that it is essentially a refactored version of Jonathan Hartley's approach:
from itertools import tee
# From https://docs.python.org/3/library/itertools.html#itertools.pairwise
# In 3.10 and up, this is directly supplied by the `itertools` module.
def pairwise(iterable):
# pairwise('ABCDEFG') --> AB BC CD DE EF FG
a, b = tee(iterable)
next(b, None)
return zip(a, b)
def process(seq):
for to_process, lookahead in pairwise(seq):
# peek ahead
if lookahead == "STOP":
return
# process items
print(to_process)

How to make a repeating generator in Python

How do you make a repeating generator, like xrange, in Python? For instance, if I do:
>>> m = xrange(5)
>>> print list(m)
>>> print list(m)
I get the same result both times — the numbers 0..4. However, if I try the same with yield:
>>> def myxrange(n):
... i = 0
... while i < n:
... yield i
... i += 1
>>> m = myxrange(5)
>>> print list(m)
>>> print list(m)
The second time I try to iterate over m, I get nothing back — an empty list.
Is there a simple way to create a repeating generator like xrange with yield, or generator comprehensions? I found a workaround on a Python tracker issue, which uses a decorator to transform a generator into an iterator. This restarts every time you start using it, even if you didn't use all the values last time through, just like xrange. I also came up with my own decorator, based on the same idea, which actually returns a generator, but one which can restart after throwing a StopIteration exception:
#decorator.decorator
def eternal(genfunc, *args, **kwargs):
class _iterable:
iter = None
def __iter__(self): return self
def next(self, *nargs, **nkwargs):
self.iter = self.iter or genfunc(*args, **kwargs):
try:
return self.iter.next(*nargs, **nkwargs)
except StopIteration:
self.iter = None
raise
return _iterable()
Is there a better way to solve the problem, using only yield and/or generator comprehensions? Or something built into Python? So I don't need to roll my own classes and decorators?
Update
The comment by u0b34a0f6ae nailed the source of my misunderstanding:
xrange(5) does not return an iterator, it creates an xrange object. xrange objects can be iterated, just like dictionaries, more than once.
My "eternal" function was barking up the wrong tree entirely, by acting like an iterator/generator (__iter__ returns self) rather than like a collection/xrange (__iter__ returns a new iterator).
Not directly. Part of the flexibility that allows generators to be used for implementing co-routines, resource management, etc, is that they are always one-shot. Once run, a generator cannot be re-run. You would have to create a new generator object.
However, you can create your own class which overrides __iter__(). It will act like a reusable generator:
def multigen(gen_func):
class _multigen(object):
def __init__(self, *args, **kwargs):
self.__args = args
self.__kwargs = kwargs
def __iter__(self):
return gen_func(*self.__args, **self.__kwargs)
return _multigen
#multigen
def myxrange(n):
i = 0
while i < n:
yield i
i += 1
m = myxrange(5)
print list(m)
print list(m)
Using itertools its super easy.
import itertools
alist = [1,2,3]
repeatingGenerator = itertools.cycle(alist)
print(next(generatorInstance)) #=> yields 1
print(next(generatorInstance)) #=> yields 2
print(next(generatorInstance)) #=> yields 3
print(next(generatorInstance)) #=> yields 1 again!
If you write a lot of these, John Millikin's answer is the cleanest it gets.
But if you don't mind adding 3 lines and some indentation, you can do it without a custom decorator. This composes 2 tricks:
[Generally useful:] You can easily make a class iterable without implementing
.next() - just use a generator for __iter__(self)!
Instead of bothering with a constructor, you can define a one-off class inside a function.
=>
def myxrange(n):
class Iterable(object):
def __iter__(self):
i = 0
while i < n:
yield i
i += 1
return Iterable()
Small print: I didn't test performance, spawning classes like this might be wasteful. But awesome ;-)
I think the answer to that is "No". I'm possibly wrong. It may be that with some of the funky new things you can do with generators in 2.6 involving arguments and exception handling that would allow something like what you want. But those features are mostly intended for implementing semi-continuations.
Why do you want to not have your own classes or decorators? And why did you want to create a decorator that returned a generator instead of a class instance?
You can reset iterators with more_itertools.seekable, a third-party tool.
Install via > pip install more_itertools.
import more_itertools as mit
def myxrange(n):
"""Yield integers."""
i = 0
while i < n:
yield i
i += 1
m = mit.seekable(myxrange(5))
print(list(m))
m.seek(0) # reset iterator
print(list(m))
# [0, 1, 2, 3, 4]
# [0, 1, 2, 3, 4]
Note: memory consumption grows while advancing an iterator, so be wary wrapping large iterables.
use this solution:
>>> myxrange_ = lambda x: myxrange(x)
>>> print list(myxrange_(5))
... [0, 1, 2, 3, 4]
>>> print list(myxrange_(5))
... [0, 1, 2, 3, 4]
>>> for number in myxrange_(5):
... print number
...
0
1
2
3
4
>>>
and with a decorator:
>>> def decorator(generator):
... return lambda x: generator(x)
...
>>> #decorator
>>> def myxrange(n):
... i = 0
... while i < n:
... yield i
... i += 1
...
>>> print list(myxrange(5))
... [0, 1, 2, 3, 4]
>>> print list(myxrange(5))
... [0, 1, 2, 3, 4]
>>>
Simple.

Resetting generator object in Python

I have a generator object returned by multiple yield. Preparation to call this generator is rather time-consuming operation. That is why I want to reuse the generator several times.
y = FunctionWithYield()
for x in y: print(x)
#here must be something to reset 'y'
for x in y: print(x)
Of course, I'm taking in mind copying content into simple list. Is there a way to reset my generator?
See also: How to look ahead one element (peek) in a Python generator?
Generators can't be rewound. You have the following options:
Run the generator function again, restarting the generation:
y = FunctionWithYield()
for x in y: print(x)
y = FunctionWithYield()
for x in y: print(x)
Store the generator results in a data structure on memory or disk which you can iterate over again:
y = list(FunctionWithYield())
for x in y: print(x)
# can iterate again:
for x in y: print(x)
The downside of option 1 is that it computes the values again. If that's CPU-intensive you end up calculating twice. On the other hand, the downside of 2 is the storage. The entire list of values will be stored on memory. If there are too many values, that can be unpractical.
So you have the classic memory vs. processing tradeoff. I can't imagine a way of rewinding the generator without either storing the values or calculating them again.
You could also use tee as suggested by other answers, however that would still store the entire list in memory in your case, so it would be the same results and similar performance to option 2.
Another option is to use the itertools.tee() function to create a second version of your generator:
import itertools
y = FunctionWithYield()
y, y_backup = itertools.tee(y)
for x in y:
print(x)
for x in y_backup:
print(x)
This could be beneficial from memory usage point of view if the original iteration might not process all the items.
>>> def gen():
... def init():
... return 0
... i = init()
... while True:
... val = (yield i)
... if val=='restart':
... i = init()
... else:
... i += 1
>>> g = gen()
>>> g.next()
0
>>> g.next()
1
>>> g.next()
2
>>> g.next()
3
>>> g.send('restart')
0
>>> g.next()
1
>>> g.next()
2
Probably the most simple solution is to wrap the expensive part in an object and pass that to the generator:
data = ExpensiveSetup()
for x in FunctionWithYield(data): pass
for x in FunctionWithYield(data): pass
This way, you can cache the expensive calculations.
If you can keep all results in RAM at the same time, then use list() to materialize the results of the generator in a plain list and work with that.
I want to offer a different solution to an old problem
class IterableAdapter:
def __init__(self, iterator_factory):
self.iterator_factory = iterator_factory
def __iter__(self):
return self.iterator_factory()
squares = IterableAdapter(lambda: (x * x for x in range(5)))
for x in squares: print(x)
for x in squares: print(x)
The benefit of this when compared to something like list(iterator) is that this is O(1) space complexity and list(iterator) is O(n). The disadvantage is that, if you only have access to the iterator, but not the function that produced the iterator, then you cannot use this method. For example, it might seem reasonable to do the following, but it will not work.
g = (x * x for x in range(5))
squares = IterableAdapter(lambda: g)
for x in squares: print(x)
for x in squares: print(x)
Using a wrapper function to handle StopIteration
You could write a simple wrapper function to your generator-generating function that tracks when the generator is exhausted. It will do so using the StopIteration exception a generator throws when it reaches end of iteration.
import types
def generator_wrapper(function=None, **kwargs):
assert function is not None, "Please supply a function"
def inner_func(function=function, **kwargs):
generator = function(**kwargs)
assert isinstance(generator, types.GeneratorType), "Invalid function"
try:
yield next(generator)
except StopIteration:
generator = function(**kwargs)
yield next(generator)
return inner_func
As you can spot above, when our wrapper function catches a StopIteration exception, it simply re-initializes the generator object (using another instance of the function call).
And then, assuming you define your generator-supplying function somewhere as below, you could use the Python function decorator syntax to wrap it implicitly:
#generator_wrapper
def generator_generating_function(**kwargs):
for item in ["a value", "another value"]
yield item
If GrzegorzOledzki's answer won't suffice, you could probably use send() to accomplish your goal. See PEP-0342 for more details on enhanced generators and yield expressions.
UPDATE: Also see itertools.tee(). It involves some of that memory vs. processing tradeoff mentioned above, but it might save some memory over just storing the generator results in a list; it depends on how you're using the generator.
If your generator is pure in a sense that its output only depends on passed arguments and the step number, and you want the resulting generator to be restartable, here's a sort snippet that might be handy:
import copy
def generator(i):
yield from range(i)
g = generator(10)
print(list(g))
print(list(g))
class GeneratorRestartHandler(object):
def __init__(self, gen_func, argv, kwargv):
self.gen_func = gen_func
self.argv = copy.copy(argv)
self.kwargv = copy.copy(kwargv)
self.local_copy = iter(self)
def __iter__(self):
return self.gen_func(*self.argv, **self.kwargv)
def __next__(self):
return next(self.local_copy)
def restartable(g_func: callable) -> callable:
def tmp(*argv, **kwargv):
return GeneratorRestartHandler(g_func, argv, kwargv)
return tmp
#restartable
def generator2(i):
yield from range(i)
g = generator2(10)
print(next(g))
print(list(g))
print(list(g))
print(next(g))
outputs:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[]
0
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1
From official documentation of tee:
In general, if one iterator uses most or all of the data before
another iterator starts, it is faster to use list() instead of tee().
So it's best to use list(iterable) instead in your case.
You can define a function that returns your generator
def f():
def FunctionWithYield(generator_args):
code here...
return FunctionWithYield
Now you can just do as many times as you like:
for x in f()(generator_args): print(x)
for x in f()(generator_args): print(x)
I'm not sure what you meant by expensive preparation, but I guess you actually have
data = ... # Expensive computation
y = FunctionWithYield(data)
for x in y: print(x)
#here must be something to reset 'y'
# this is expensive - data = ... # Expensive computation
# y = FunctionWithYield(data)
for x in y: print(x)
If that's the case, why not reuse data?
There is no option to reset iterators. Iterator usually pops out when it iterate through next() function. Only way is to take a backup before iterate on the iterator object. Check below.
Creating iterator object with items 0 to 9
i=iter(range(10))
Iterating through next() function which will pop out
print(next(i))
Converting the iterator object to list
L=list(i)
print(L)
output: [1, 2, 3, 4, 5, 6, 7, 8, 9]
so item 0 is already popped out. Also all the items are popped as we converted the iterator to list.
next(L)
Traceback (most recent call last):
File "<pyshell#129>", line 1, in <module>
next(L)
StopIteration
So you need to convert the iterator to lists for backup before start iterating.
List could be converted to iterator with iter(<list-object>)
You can now use more_itertools.seekable (a third-party tool) which enables resetting iterators.
Install via > pip install more_itertools
import more_itertools as mit
y = mit.seekable(FunctionWithYield())
for x in y:
print(x)
y.seek(0) # reset iterator
for x in y:
print(x)
Note: memory consumption grows while advancing the iterator, so be wary of large iterables.
You can do that by using itertools.cycle()
you can create an iterator with this method and then execute a for loop over the iterator which will loop over its values.
For example:
def generator():
for j in cycle([i for i in range(5)]):
yield j
gen = generator()
for i in range(20):
print(next(gen))
will generate 20 numbers, 0 to 4 repeatedly.
A note from the docs:
Note, this member of the toolkit may require significant auxiliary storage (depending on the length of the iterable).
How it's work for me.
csv_rows = my_generator()
for _ in range(10):
for row in csv_rows:
print(row)
csv_rows = my_generator()
Ok, you say you want to call a generator multiple times, but initialization is expensive... What about something like this?
class InitializedFunctionWithYield(object):
def __init__(self):
# do expensive initialization
self.start = 5
def __call__(self, *args, **kwargs):
# do cheap iteration
for i in xrange(5):
yield self.start + i
y = InitializedFunctionWithYield()
for x in y():
print x
for x in y():
print x
Alternatively, you could just make your own class that follows the iterator protocol and defines some sort of 'reset' function.
class MyIterator(object):
def __init__(self):
self.reset()
def reset(self):
self.i = 5
def __iter__(self):
return self
def next(self):
i = self.i
if i > 0:
self.i -= 1
return i
else:
raise StopIteration()
my_iterator = MyIterator()
for x in my_iterator:
print x
print 'resetting...'
my_iterator.reset()
for x in my_iterator:
print x
https://docs.python.org/2/library/stdtypes.html#iterator-types
http://anandology.com/python-practice-book/iterators.html
My answer solves slightly different problem: If the generator is expensive to initialize and each generated object is expensive to generate. But we need to consume the generator multiple times in multiple functions. In order to call the generator and each generated object exactly once we can use threads and Run each of the consuming methods in different thread. We may not achieve true parallelism due to GIL, but we will achieve our goal.
This approach did a good job in the following case: deep learning model processes a lot of images. The result is a lot of masks for a lot of objects on the image. Each mask consumes memory. We have around 10 methods which make different statistics and metrics, but they take all the images at once. All the images cannot fit in memory. The moethods can easily be rewritten to accept iterator.
class GeneratorSplitter:
'''
Split a generator object into multiple generators which will be sincronised. Each call to each of the sub generators will cause only one call in the input generator. This way multiple methods on threads can iterate the input generator , and the generator will cycled only once.
'''
def __init__(self, gen):
self.gen = gen
self.consumers: List[GeneratorSplitter.InnerGen] = []
self.thread: threading.Thread = None
self.value = None
self.finished = False
self.exception = None
def GetConsumer(self):
# Returns a generator object.
cons = self.InnerGen(self)
self.consumers.append(cons)
return cons
def _Work(self):
try:
for d in self.gen:
for cons in self.consumers:
cons.consumed.wait()
cons.consumed.clear()
self.value = d
for cons in self.consumers:
cons.readyToRead.set()
for cons in self.consumers:
cons.consumed.wait()
self.finished = True
for cons in self.consumers:
cons.readyToRead.set()
except Exception as ex:
self.exception = ex
for cons in self.consumers:
cons.readyToRead.set()
def Start(self):
self.thread = threading.Thread(target=self._Work)
self.thread.start()
class InnerGen:
def __init__(self, parent: "GeneratorSplitter"):
self.parent: "GeneratorSplitter" = parent
self.readyToRead: threading.Event = threading.Event()
self.consumed: threading.Event = threading.Event()
self.consumed.set()
def __iter__(self):
return self
def __next__(self):
self.readyToRead.wait()
self.readyToRead.clear()
if self.parent.finished:
raise StopIteration()
if self.parent.exception:
raise self.parent.exception
val = self.parent.value
self.consumed.set()
return val
Ussage:
genSplitter = GeneratorSplitter(expensiveGenerator)
metrics={}
executor = ThreadPoolExecutor(max_workers=3)
f1 = executor.submit(mean,genSplitter.GetConsumer())
f2 = executor.submit(max,genSplitter.GetConsumer())
f3 = executor.submit(someFancyMetric,genSplitter.GetConsumer())
genSplitter.Start()
metrics.update(f1.result())
metrics.update(f2.result())
metrics.update(f3.result())
If you want to reuse this generator multiple times with a predefined set of arguments, you can use functools.partial.
from functools import partial
func_with_yield = partial(FunctionWithYield, arg0, arg1)
for i in range(100):
for x in func_with_yield():
print(x)
This will wrap the generator function in another function so each time you call func_with_yield() it creates the same generator function.
It can be done by code object. Here is the example.
code_str="y=(a for a in [1,2,3,4])"
code1=compile(code_str,'<string>','single')
exec(code1)
for i in y: print i
1
2
3
4
for i in y: print i
exec(code1)
for i in y: print i
1
2
3
4

Categories

Resources