How to asynchronously run functions within a for-loop in Python? - python

Hi I was wondering how to asynchronously call a function within a for-loop in Python, allowing the for-loop to execute more quickly. bar() in this case is a time intensive function, which is why I want the calls to it to be nonblocking.
Here is what I want to refactor:
def bar(item):
//manipulate item
return newItem
newItems = []
for item in items:
newItem = foo(item)
newItems.append[newItem]
Here is what I've tried:
async def bar(item):
//manipulate item
return newItem
async def foo():
newItems = [bar(item) for item in items]
newItems = await asyncio.gather(*newItems)
return newItems
newItems = asyncio.run(foo())
This doesn't seem to work as each function call still waits for the previous one to finish before starting. I would love tips on what I might be doing wrong. Thank you so much for any and all help!

If your tasks are really async you can do it the following way:
import asyncio
async def bar(item: int) -> int:
# manipulate item
print("Started")
await asyncio.sleep(5)
print("Finished")
return item ** 2
async def foo():
items = range(1, 10)
tasks = [bar(item) for item in items]
new_items = await asyncio.gather(*tasks)
return new_items
if __name__ == '__main__':
results = asyncio.run(foo())
print(results)

Related

async generator with slow consumer

If I have a slow consumer of an async generator that emits values at a quick frequency, and I only care about consuming the latest value (i.e. I'm not fussed about dropping values), is there a way to achieve this in an eloquent way? I've taken a look at aiostream but I couldn't seem to find anything that fits.
Here is a simple example:
import asyncio
import aiostream
async def main():
xs = aiostream.stream.count(interval=0.2)
async with xs.stream() as stream:
async for x in stream: # do something here to drop updates that aren't processed in time
print(x)
await asyncio.sleep(1.0)
if __name__ == "__main__":
asyncio.run(main())
I propose you to use a class that handles the external generator, since I don't know any source to do that.
The class can consume internally the generator in a task and keep only the last value. It's going to be like a wrapper over the generator you really want to consume.
import asyncio
class RelaxedGenerator:
def __init__(self, async_gen):
self.last_value = None # the last value generated
self.consumed_last = True # flags the last value as consumed
self.async_gen = async_gen # generator which we can drop values
self.exhausted = False # flags the generator as fully consumed
#classmethod
async def start(cls, async_gen):
self = cls(async_gen())
asyncio.create_task(self.generate())
return self
async def generate(self):
# here you can consume the external async generator
# and save only the last value for further process
while True:
try:
self.last_value = await self.async_gen.__anext__()
self.consumed_last = False
except StopAsyncIteration:
self.exhausted = True
break
async def stream(self):
while not self.exhausted:
if self.consumed_last:
await asyncio.sleep(0.01) # avoids block the loop
continue
self.consumed_last = True
yield self.last_value
Testing with a simple generator:
import asyncio
from random import uniform
async def numbers_stream(max_=100):
next_int = -1
while next_int < max_:
next_int += 1
yield next_int
await asyncio.sleep(0.2)
async def main():
gen = await RelaxedGenerator.start(numbers_stream)
async for value in gen.stream():
print(value, end=", ", flush=True)
await asyncio.sleep(uniform(1, 2))
asyncio.run(main())
Output:
0, 6, 15, 21, 28, 38, 43, 48, 57, 65, 73, 81, 89, 96,
Other things to keep in mind is if you want to process the last value or if the generator you are working with is going to be exhausted or not in practice. Here I assume that you don't care about last value and the generator can be exhausted.
You could add a queue between your producer and consumer which forgets old results. Unfortunately, there is no implementation for it in the standard library, but it is almost there. If you check the implementation of asyncio.Queue you will notice the use of collections.deque, see https://github.com/python/cpython/blob/3.10/Lib/asyncio/queues.py#L49.
The collections.deque takes the optional argument maxlen to discard previously added items, see https://docs.python.org/3/library/collections.html#collections.deque.
Making use of it, enables us to create our custom queue, which only keeps the last n items.
import asyncio
import collections
class RollingQueue(asyncio.Queue):
def _init(self, maxsize):
self._queue = collections.deque(maxlen=maxsize)
def full(self):
return False
Now you could use this queue as follows:
async def numbers(nmax):
for n in range(nmax):
yield n
await asyncio.sleep(0.3)
async def fill_queue(producer, queue):
async for item in producer:
queue.put_nowait(item)
queue.put_nowait(None)
queue1 = RollingQueue(1)
numgen = numbers(10)
task = fill_queue(numgen, queue1)
asyncio.create_task(task)
while True:
res = await queue1.get()
if res is None:
break
print(res)
await asyncio.sleep(1)
Where I set the queue size to 1 to just keep the last item as required in your question.
Using a combination of the two provided answers, I came up with the following solution which seems to work quite well:
import asyncio
import aiostream
import collections
class RollingQueue(asyncio.Queue):
def _init(self, maxsize):
self._queue = collections.deque(maxlen=maxsize)
def full(self):
return False
#aiostream.operator(pipable=True)
async def drop_stream(source, max_n=1):
queue = RollingQueue(max_n)
exhausted = False
async def inner_task():
async with aiostream.streamcontext(source) as streamer:
async for item in streamer:
queue.put_nowait(item)
nonlocal exhausted
exhausted = True
task = asyncio.create_task(inner_task())
try:
while not exhausted:
item = await queue.get()
yield item
finally:
task.cancel()
async def main():
xs = aiostream.stream.count(interval=0.2) | drop_stream.pipe(1) | aiostream.pipe.take(5)
async with xs.stream() as stream:
async for x in stream:
print(x)
await asyncio.sleep(1.0)
if __name__ == "__main__":
asyncio.run(main())

How to yield from an async for loop using asyncio?

I'm trying to write a simple asynchronous data batch generator, but having troubles with understanding how to yield from an async for loop. Here I've written a simple class illustrating my idea:
import asyncio
from typing import List
class AsyncSimpleIterator:
def __init__(self, data: List[str], batch_size=None):
self.data = data
self.batch_size = batch_size
self.doc2index = self.get_doc_ids()
def get_doc_ids(self):
return list(range(len(self.data)))
async def get_batch_data(self, doc_ids):
print("get_batch_data() running")
page = [self.data[j] for j in doc_ids]
return page
async def get_docs(self, batch_size):
print("get_docs() running")
_batch_size = self.batch_size or batch_size
batches = [self.doc2index[i:i + _batch_size] for i in
range(0, len(self.doc2index), _batch_size)]
for _, doc_ids in enumerate(batches):
docs = await self.get_batch_data(doc_ids)
yield docs, doc_ids
async def main(self):
print("main() running")
async for res in self.get_docs(batch_size=2):
print(res) # how to yield instead of print?
def gen_batches(self):
# how to get results of self.main() here?
loop = asyncio.get_event_loop()
loop.run_until_complete(self.main())
loop.close()
DATA = ["Hello, world!"] * 4
iterator = AsyncSimpleIterator(DATA)
iterator.gen_batches()
So, my question is, how to yield a result from main() to gather it inside gen_batches()?
When I print the result inside main(), I get the following output:
main() running
get_docs() running
get_batch_data() running
(['Hello, world!', 'Hello, world!'], [0, 1])
get_batch_data() running
(['Hello, world!', 'Hello, world!'], [2, 3])
I'm trying to write a simple asynchronous data batch generator, but having troubles with understanding how to yield from an async for loop
Yielding from an async for works like a regular yield, except that it also has to be collected by an async for or equivalent. For example, the yield in get_docs makes it an async generator. If you replace print(res) with yield res in main(), it will make main() an async generator as well.
the generator in main() should exhaust in gen_batches(), so I can gather all results in gen_batches()
To collect the values produced by an async generator (such as main() with print(res) replaced with yield res), you can use a helper coroutine:
def gen_batches(self):
loop = asyncio.get_event_loop()
async def collect():
return [item async for item in self.main()]
items = loop.run_until_complete(collect())
loop.close()
return items
The collect() helper makes use of a PEP 530 asynchronous comprehension, which can be thought of as syntactic sugar for the more explicit:
async def collect():
l = []
async for item in self.main():
l.append(item)
return l
A working solution based on #user4815162342 answer to the original question:
import asyncio
from typing import List
class AsyncSimpleIterator:
def __init__(self, data: List[str], batch_size=None):
self.data = data
self.batch_size = batch_size
self.doc2index = self.get_doc_ids()
def get_doc_ids(self):
return list(range(len(self.data)))
async def get_batch_data(self, doc_ids):
print("get_batch_data() running")
page = [self.data[j] for j in doc_ids]
return page
async def get_docs(self, batch_size):
print("get_docs() running")
_batch_size = self.batch_size or batch_size
batches = [self.doc2index[i:i + _batch_size] for i in
range(0, len(self.doc2index), _batch_size)]
for _, doc_ids in enumerate(batches):
docs = await self.get_batch_data(doc_ids)
yield docs, doc_ids
def gen_batches(self):
loop = asyncio.get_event_loop()
async def collect():
return [j async for j in self.get_docs(batch_size=2)]
items = loop.run_until_complete(collect())
loop.close()
return items
DATA = ["Hello, world!"] * 4
iterator = AsyncSimpleIterator(DATA)
result = iterator.gen_batches()
print(result)

How to use an asyncio loop inside another asyncio loop

I have been trying all kinds of things to be able to use an asyncio loop inside another asyncio loop. Most of the time my test just end in errors, such as:
RuntimeError: This event loop is already running
My example code below is just the base test I started with, so you can see the basics of what I am trying to do. I tried so many things after this test, it was just too confusing, so I figured I should keep it simple when asking for help. If anyone can point me in the right direction, that would be great. Thank you for your time!
import asyncio
async def fetch(data):
message = 'Hey {}!'.format(data)
other_data = ['image_a.com', 'image_b.com', 'image_c.com']
images = sub_run(other_data)
return {'message' : message, 'images' : images}
async def bound(sem, data):
async with sem:
r = await fetch(data)
return r
async def build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
async def sub_fetch(data):
image = 'https://{}'.format(data)
return image
async def sub_bound(sem, data):
async with sem:
r = await sub_fetch(data)
return r
async def sub_build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(sub_bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def sub_run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(sub_build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
if __name__ == '__main__':
dataset = ['Joe', 'Bob', 'Zoe', 'Howard']
responses = run(dataset)
print (responses)
Running loop.run_until_compete inside a running event loop would block the outer loop, thus defeating the purpose of using asyncio. Because of that, asyncio event loops aren't recursive, and one shouldn't need to run them recursively. Instead of creating an inner event loop, await a task on the existing one.
In your case, remove sub_run and simply replace its usage:
images = sub_run(other_data)
with:
images = await sub_build(other_data)
And it will work just fine, running the sub-coroutines and not continuing with the outer coroutine until the inner one is complete, as you likely intended from the sync code.

Builtin way to transform asynchronous iterable to synchronous iterable list

Python3.6 now asynchronous iterables. Is there builtin way to transform a asynchronous iterable to a synchronous iterable.
I currently have this helper function, but it feels very un-pythonic. Is there a better way to do this?
async def aiter_to_list(aiter):
l = []
async for i in aiter:
l.append(i)
return l
From Python 3.6 you can use Asynchronous Comprehensions
async def async_iter():
for i in range(0,5):
yield i
# async comprehension
sync_list = [gen async for gen in async_iter()]
print(sync_list) # [0, 1, 2, 3, 4]
You can use aiostream.stream.list:
from aiostream import stream
async def agen():
yield 1
yield 2
yield 3
async def main():
lst = await stream.list(agen())
print(lst) # prints [1, 2, 3]
More operators and examples in the documentation.
Your "asynchronous to synchronous" helper is itself asynchronous; not a big change at all. In general: no, you cannot make something asynchronous synchronous. An asynchronous value will be supplied "sometime later"; you cannot make that into "now" because the value doesn't exist "now" and you will have to wait for it, asynchronously.
These functions allow you to convert from / to iterable <==> async iterable, not just simple lists.
Basic imports
import asyncio
import threading
import time
DONE = object()
TIMEOUT = 0.001
The function to_sync_iterable will convert any async iterable to a sync iterable:
def to_sync_iterable(async_iterable, maxsize = 0):
def sync_iterable():
queue = asyncio.Queue(maxsize=maxsize)
loop = asyncio.get_event_loop()
t = threading.Thread(target=_run_coroutine, args=(loop, async_iterable, queue))
t.daemon = True
t.start()
while True:
if not queue.empty():
x = queue.get_nowait()
if x is DONE:
break
else:
yield x
else:
time.sleep(utils.TIMEOUT)
t.join()
return sync_iterable()
def _run_coroutine(loop, async_iterable, queue):
loop.run_until_complete(_consume_async_iterable(async_iterable, queue))
async def _consume_async_iterable(async_iterable, queue):
async for x in async_iterable:
await queue.put(x)
await queue.put(DONE)
You can use it like this:
async def slow_async_generator():
yield 0
await asyncio.sleep(1)
yield 1
await asyncio.sleep(1)
yield 2
await asyncio.sleep(1)
yield 3
for x in to_sync_iterable(slow_async_generator()):
print(x)
The function to_async_iterable will convert any sync iterable to an async iterable:
def to_async_iterable(iterable, maxsize = 0):
async def async_iterable():
queue = asyncio.Queue(maxsize=maxsize)
loop = asyncio.get_event_loop()
task = loop.run_in_executor(None, lambda: _consume_iterable(loop, iterable, queue))
while True:
x = await queue.get()
if x is DONE:
break
else:
yield x
await task
return async_iterable()
def _consume_iterable(loop, iterable, queue):
for x in iterable:
while True:
if not queue.full():
loop.call_soon_threadsafe(queue.put_nowait, x)
break
else:
time.sleep(TIMEOUT)
while True:
if not queue.full():
loop.call_soon_threadsafe(queue.put_nowait, DONE)
break
else:
time.sleep(TIMEOUT)
This one is specially useful for asyncio programs because it won't block the event loop even if the the sync iterable blocks. You can use it like this:
def slow_sync_generator():
yield 0
time.sleep(1)
yield 1
time.sleep(1)
yield 2
time.sleep(1)
yield 3
async def async_task():
async for x in to_async_iterable(slow_sync_generator()):
print(x)
asyncio.get_event_loop().run_until_complete(async_task())

How to use 'yield' inside async function?

I want to use generator yield and async functions. I read this topic, and wrote next code:
import asyncio
async def createGenerator():
mylist = range(3)
for i in mylist:
await asyncio.sleep(1)
yield i*i
async def start():
mygenerator = await createGenerator()
for i in mygenerator:
print(i)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(start())
except KeyboardInterrupt:
loop.stop()
pass
But i got the error:
SyntaxError: 'yield' inside async function
How to use yield generator in async function?
Upd:
Starting with Python 3.6 we have asynchronous generators and able to use yield directly inside coroutines.
import asyncio
async def async_generator():
for i in range(3):
await asyncio.sleep(1)
yield i*i
async def main():
async for i in async_generator():
print(i)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens()) # see: https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.shutdown_asyncgens
loop.close()
Old answer for Python 3.5:
You can't yield inside coroutines. Only way is to implement Asynchronous Iterator manually using __aiter__/__anext__ magic methods. In your case:
import asyncio
class async_generator:
def __init__(self, stop):
self.i = 0
self.stop = stop
async def __aiter__(self):
return self
async def __anext__(self):
i = self.i
self.i += 1
if self.i <= self.stop:
await asyncio.sleep(1)
return i * i
else:
raise StopAsyncIteration
async def main():
async for i in async_generator(3):
print(i)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Output:
0
1
4
Here're two more examples: 1, 2
New Python 3.6 comes with support for asynchronous generators.
PEP 0525
What's new in Python 3.6
PS: On the moment of writing Python 3.6 is still beta. If you are on GNU/Linux or OS X and you cannot wait you can try new Python with pyenv.
This should work with python 3.6 (tested with 3.6.0b1):
import asyncio
async def createGenerator():
mylist = range(3)
for i in mylist:
await asyncio.sleep(1)
yield i*i
async def start():
async for i in createGenerator():
print(i)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(start())
except KeyboardInterrupt:
loop.stop()
pass

Categories

Resources