I'm looking at this piece of code from the example from here
And i want to know at what exact moment does the consumers() coroutine get called?
import asyncio
import itertools as it
import os
import random
import time
async def makeitem(size: int = 5) -> str:
return os.urandom(size).hex()
async def randsleep(a: int = 1, b: int = 5, caller=None) -> None:
i = random.randint(0, 10)
if caller:
print(f"{caller} sleeping for {i} seconds.")
await asyncio.sleep(i)
async def produce(name: int, q: asyncio.Queue) -> None:
n = random.randint(0, 10)
for _ in it.repeat(None, n): # Synchronous loop for each single producer
await randsleep(caller=f"Producer {name}")
i = await makeitem()
t = time.perf_counter()
await q.put((i, t))
print(f"Producer {name} added <{i}> to queue.")
async def consume(name: int, q: asyncio.Queue) -> None:
while True:
await randsleep(caller=f"Consumer {name}")
i, t = await q.get()
now = time.perf_counter()
print(f"Consumer {name} got element <{i}>"
f" in {now-t:0.5f} seconds.")
q.task_done()
async def main(nprod: int, ncon: int):
q = asyncio.Queue()
producers = [asyncio.create_task(produce(n, q)) for n in range(nprod)]
consumers = [asyncio.create_task(consume(n, q)) for n in range(ncon)]
await asyncio.gather(*producers)
await q.join() # Implicitly awaits consumers, too
for c in consumers:
c.cancel()
if __name__ == "__main__":
import argparse
random.seed(444)
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--nprod", type=int, default=5)
parser.add_argument("-c", "--ncon", type=int, default=10)
ns = parser.parse_args()
start = time.perf_counter()
asyncio.run(main(**ns.__dict__))
elapsed = time.perf_counter() - start
print(f"Program completed in {elapsed:0.5f} seconds.")
I only see this line triggering the execution for both producer and consumer coroutines.
await asyncio.gather(*producers)
I don't understand how until the await line mentioned above there's no execution in the background when tasks are defined and created in these lines( because none of the print statements inside the producers and consumers are displayed).:
producers = [asyncio.create_task(produce(n, q)) for n in range(nprod)]
consumers = [asyncio.create_task(consume(n, q)) for n in range(ncon)]
While create_task() doesn't start executing the coroutine immediately, it schedules execution in the background at the first possible opportunity, i.e. at the first await that suspends to the event loop.
gather() is just a helper function that waits for the given awaitables to complete. It doesn't prevent previously scheduled coroutines (such as those started with create_task, but also start_server etc.) from executing.
i want to know at what exact moment does the consumers() coroutine get called?
Since consumers is a coroutine, while it's called once, it can suspend and resume many times, each await serving as a point of suspension/resumption. When you call create_task() it is placed in a queue of runnable coroutines. In each iteration of the event loop asyncio goes through runnable coroutines and executes a "step" of each, where the step executes it until the first await that chooses to suspend. In your code the step happens when your main coroutine suspends in order to wait for gather() to complete.
Related
I need to implement the algorithm using asyncio with the following conditions (behaviour):
check the list of arguments is not empty, if empty finish execution
pop next argument from the list of arguments create coroutine with
this argument and schedule it for execution "at same time" can't be
executed no more than 'async_level' coroutines when coroutine finish
execution -> go to the step 1
It is necessary not to plan to complete all tasks at once (as with asyncio.gather), but to do it in parts. When the next task finishes execution, a new one takes its place.
I tried to do it with asyncio.as_completed() but it doesn't actually work as expected:
async_level = 4
params_count = 10
params = [i for i in range(1, params_count + 1)]
tasks = {asyncio.create_task(job(param)) for param in params[0: async_level]}
params = iter(params[async_level:])
while True:
# NOTE: It wont work, because you can't add task in 'tasks' after 'as_completed' is invoked, so execution actually ends when the last coroutine in the 'as_completed' ends
for task in asyncio.as_completed(tasks):
print(f"len(tasks) = {len(tasks)}")
await task
try:
param = next(params)
tasks.add(asyncio.create_task(job(param)))
except StopIteration:
print("StopIteration")
break
Also, I tried to implement it using asyncio.BoundedSemaphore, but first two conditions are not met:
async_level = 4
params_count = 10
params = [i for i in range(1, params_count + 1)]
async def semaphore_job(name, _asyncio_semaphore):
async with _asyncio_semaphore:
await job(name)
asyncio_semaphore = asyncio.BoundedSemaphore(async_level)
jobs = []
# NOTE: This variant schedule all jobs at ones and it's significant drawback because the count of jobs can be overwhelmed
for param in params:
jobs.append(asyncio.ensure_future(semaphore_job(param, asyncio_semaphore)))
await asyncio.gather(*jobs)
I would be grateful for any of your help.
It seems I found the solution myself:
import asyncio
from typing import Callable
from random import randrange
from asyncio import Semaphore, ensure_future, get_event_loop
async def job(name, time_range=10):
timeout = randrange(time_range)
print(f"Task '{name}' started with timeout {timeout}")
await asyncio.sleep(timeout)
print(f"Task '{name}' finished")
return name
async def custom_executor(func: Callable, args: list, async_level: int = 4):
""" Asynchronously executes no more that 'async_level' callables specified by 'func' with corresponding 'args' """
loop = get_event_loop()
sync = Semaphore()
todo = set(args)
doing = set()
def _schedule_task():
if todo:
arg = todo.pop()
fr = func(*arg) if isinstance(arg, (tuple, list, set)) else func(arg)
f = ensure_future(fr, loop=loop)
f.add_done_callback(_on_completion)
doing.add(f)
def _on_completion(f):
doing.remove(f)
sync.release()
_schedule_task()
for _ in range(min(async_level, len(todo))):
_schedule_task()
while True:
if not doing:
break
await sync.acquire()
async def main():
await custom_executor(job, [(1, 3), 7, (8, 2), 12, 5])
if __name__ == '__main__':
asyncio.run(main())
But if you know a better way, please share!
You can create a fixed number of workers and give them tasks using a queue. It's a bit shorter and I find it a bit easier to reason about than your code, which uses callbacks. But YMMV.
async def custom_executor(func, args, async_level=4):
queue = asyncio.Queue(1)
async def worker():
while True:
arg = await queue.get()
fr = func(*arg) if isinstance(arg, (tuple, list, set)) else func(arg)
await fr
queue.task_done()
# create the workers
workers = [asyncio.create_task(worker()) for _ in range(async_level)]
# Feed the workers tasks. Since the queue is bounded, this will also
# wait for previous tasks to finish, similar to what you wanted to
# achieve with as_completed().
for x in args:
await queue.put(x)
await queue.join() # wait for the remaining tasks to finish
# cancel the now-idle workers
for w in workers:
w.cancel()
Why is await queue.get() blocking?
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
async def main():
queue = asyncio.Queue()
await consumer(queue)
await producer(queue, 1)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
If I call the producer() before consumer(), it works fine
That is to say, the following works fine.
async def main():
queue = asyncio.Queue()
await producer(queue, 1)
await consumer(queue)
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
You need to start the consumer and the producer in parallel, e.g. defining main like this:
async def main():
queue = asyncio.Queue()
await asyncio.gather(consumer(queue), producer(queue, 1))
If for some reason you can't use gather, then you can do (the equivalent of) this:
async def main():
queue = asyncio.Queue()
asyncio.create_task(consumer(queue))
asyncio.create_task(producer(queue, 1))
await asyncio.sleep(100) # what your program actually does
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
await queue.get() is yielding control back to the event loop. But await means wait, so when your main coroutine says await consumer(queue), that means "resume me once consumer(queue) has completed." Since consumer(queue) is itself waiting for someone to produce something, you have a classic case of deadlock.
Reversing the order works only because your producer is one-shot, so it immediately returns to the caller. If your producer happened to await an external source (such as a socket), you would have a deadlock there as well. Starting them in parallel avoids the deadlock regardless of how producer and consumer are written.
It's because you call await consumer(queue), which means the next line (procuder) will not be called until consumer returns, which it of course never does because nobody produced yet
check out the Example in the docs and see how they use it there: https://docs.python.org/3/library/asyncio-queue.html#examples
another simple example:
import asyncio
import random
async def produce(queue, n):
for x in range(1, n + 1):
# produce an item
print('producing {}/{}'.format(x, n))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
item = str(x)
# put the item in the queue
await queue.put(item)
# indicate the producer is done
await queue.put(None)
async def consume(queue):
while True:
# wait for an item from the producer
item = await queue.get()
if item is None:
# the producer emits None to indicate that it is done
break
# process the item
print('consuming item {}...'.format(item))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
producer_coro = produce(queue, 10)
consumer_coro = consume(queue)
loop.run_until_complete(asyncio.gather(producer_coro, consumer_coro))
loop.close()
You should use .run_until_complete() with .gather()
Here is your updated code:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.run_until_complete(
asyncio.gather(consumer(queue), producer(queue, 1))
)
loop.close()
Out:
val = 1
Also you could use .run_forever() with .create_task()
So your code snippet will be:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.create_task(consumer(queue))
loop.create_task(producer(queue, 1))
try:
loop.run_forever()
except KeyboardInterrupt:
loop.close()
Out:
val = 1
I would like to stop a python asyncio task from another task and start it again when some condition in the second task happen.
Please note, than I don't want to cancel the coroutine of the first task (the state of that coroutine when it stopped should be available). Also, I don't care about the exact state the first task is in, I just want the event loop stop running the first task until told otherwise from the second.
I hope this example code helps understanding the problem:
import asyncio
async def coroutine1():
i = 0
while(True):
i += 1
print("coroutine1: " + str(i) )
await asyncio.sleep(1)
async def coroutine2(task1):
i = 0
while(True):
i += 1
if (i > 3) and (i<10):
pass #TODO: stop task1 here
else:
pass #TODO: Maybe check if task1 is running
#and start task1 again if it's not?
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
loop = asyncio.get_event_loop()
task1 = loop.create_task(coroutine1())
task2 = loop.create_task(coroutine2(task1))
done, pending = await asyncio.wait(
[task1, task2]
, return_when=asyncio.FIRST_COMPLETED,)
loop = asyncio.get_event_loop()
loop.run_until_complete(main_coroutine())
loop.close()
I would like to stop a python asyncio task from another task and start it again when some condition in the second task happen.
I assume you control the task creation, but don't want to touch the implementation of the coroutine. In your case, you control coroutine2 and main_coroutine, but not the insides of coroutine1.
In that case you can wrap the coroutine in a an __await__ that, instead of the normal yield from loop, checkes your stopped flag and waits for a future that tells it when to resume.
class Stoppable:
def __init__(self, coro):
self._coro_iter = coro.__await__()
self._stopped = None
def __await__(self):
while True:
while self._stopped:
print('awaiting stopped')
yield from self._stopped.__await__()
try:
v = next(self._coro_iter)
except StopIteration as e:
return v
yield v
def stop(self):
loop = asyncio.get_event_loop()
self._stopped = loop.create_future()
def start(self):
if self._stopped is not None:
self._stopped.set_result(None)
self._stopped = None
You can use the wrapper to modify coroutine2 to stop and resume the execution of coroutine1 at will:
async def coroutine2(s):
i = 0
while True:
i += 1
if i == 3:
print('stopping coroutine1')
s.stop()
elif i == 10:
print('restarting coroutine1')
s.start()
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
loop = asyncio.get_event_loop()
s = Stoppable(coroutine1())
fut1 = asyncio.ensure_future(s)
task2 = loop.create_task(coroutine2(s))
done, pending = await asyncio.wait(
[fut1, task2], return_when=asyncio.FIRST_COMPLETED)
The way wrapper works is by unrolling the loop inherent in yield from. For example, to just delegate __await__ to another coroutine, one would write:
def __await__(self):
yield from self._coro_iter
Written like this, you can't implement stopping because the yield from contains an implicit loop that yields all the values produced by the underlying iterator - something like:
def __await__(self):
while True:
try:
v = next(self._coro_iter)
except StopIteration as e:
return e.value
yield v
Taken like this, it is easy enough to add an if that checks for _stopped at each iteration pass, meaning each time we're resumed by the event loop. The remaining hurdle is that one cannot just busy-loop until _stopped is rescinded - we must yield something else to allow the event loop to resume running other coroutines. Fortunately that is easily achieved by making _stopped a future, and yielding from the future. When the future's result is set, we will be automatically resumed and continue executing the wrapped coroutine.
It seems it can't be done.
It's possible to cancel an ongoing task with task1.cancel() and it's possible to create a new task with asyncio.get_event_loop().create_task(newTask).
It's also possible to get the coroutine of a running task with task1._coro but if we try to create a task again with a previously scheduled coroutine we will get a RuntimeError exception. This the discussion where they decided it: https://bugs.python.org/issue25887
Finally, a possible way of accomplishing the desire effect is using a asyncio.Queue object:
import asyncio
async def coroutine1(stop_queue):
i = 0
while(True):
if stop_queue.empty(): #if the queue is empty keep working.
i += 1
print("coroutine1: " + str(i) )
await asyncio.sleep(1)
async def coroutine2(stop_queue):
i = 0
while(True):
i += 1
if i == 3:
await stop_queue.put("whatever..") #put something in the queue
if i == 11:
await stop_queue.get() #take something from the queue
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
stop_queue = asyncio.Queue()
done, pending = await asyncio.wait(
[coroutine1(stop_queue), coroutine2(stop_queue)]
, return_when=asyncio.ALL_COMPLETED,)
loop = asyncio.get_event_loop()
loop.run_until_complete(main_coroutine())
loop.close()
If I run the following code:
import asyncio
import time
import concurrent.futures
def cpu_bound(mul):
for i in range(mul*10**8):
i+=1
print('result = ', i)
return i
async def say_after(delay, what):
print('sleeping async...')
await asyncio.sleep(delay)
print(what)
# The run_in_pool function must not block the event loop
async def run_in_pool():
with concurrent.futures.ProcessPoolExecutor() as executor:
result = executor.map(cpu_bound, [1, 1, 1])
async def main():
task1 = asyncio.create_task(say_after(0.1, 'hello'))
task2 = asyncio.create_task(run_in_pool())
task3 = asyncio.create_task(say_after(0.1, 'world'))
print(f"started at {time.strftime('%X')}")
await task1
await task2
await task3
print(f"finished at {time.strftime('%X')}")
if __name__ == '__main__':
asyncio.run(main())
The output is:
started at 18:19:28
sleeping async...
result = 100000000
result = 100000000
result = 100000000
sleeping async...
hello
world
finished at 18:19:34
This shows that the event loop blocks until the cpu bound jobs (task2) finish and it continues afterwards with the task3.
If I run only one cpu bound job (the run_in_pool is the following one):
async def run_in_pool():
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as executor:
result = await loop.run_in_executor(executor, cpu_bound, 1)
Then it seems that the event loop doesn't block since the output is:
started at 18:16:23
sleeping async...
sleeping async...
hello
world
result = 100000000
finished at 18:16:28
How can I run many cpu bound jobs (in task2) in a process pool without blocking the event loop?
As you discovered, you need to use asyncio's own run_in_executor to wait for submitted tasks to finish without blocking the event loop. Asyncio doesn't provide the equivalent of map, but it's not hard to emulate it:
async def run_in_pool():
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [loop.run_in_executor(executor, cpu_bound, i)
for i in (1, 1, 1)]
result = await asyncio.gather(*futures)
I have a program with one producer and two slow consumers and I'd like to rewrite it with coroutines in such way that each consumer will handle only last value (i.e. skip new values generated during processing the old ones) produced for it (I used threads and threading.Queue() but with it blocks on put(), cause the queue will be full most of the time).
After reading answer to this question I decided to use asyncio.Event and asyncio.Queue. I wrote this prototype program:
import asyncio
async def l(event, q):
h = 1
while True:
# ready
event.set()
# get value to process
a = await q.get()
# process it
print(a * h)
h *= 2
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
and I have noticed that l coroutine blocks on q.get() line and doesn't print anything.
It works as I expect after adding asyncio.sleep() in both (I get 1,11,21,...):
import asyncio
import time
async def l(event, q):
h = 1
a = 1
event.set()
while True:
# await asyncio.sleep(1)
a = await q.get()
# process it
await asyncio.sleep(1)
print(a * h)
event.set()
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
await asyncio.sleep(0.1)
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
...but I'm looking for solution without it.
Why is it so? How can I fix it? I think I cannot call await l() from m as both of them have states (in original program the first draws solution with PyGame and the second plots results).
The code is not working as expected as the task running the m function is never stopped. The task will continue increment i in the case that event.is_set() == False. Because this task is never suspended, the task running function l will never be called. Therefore, you need a way to suspend the task running function m. One way of suspending is awaiting another coroutine, that is the reason why a asyncio.sleep works as expected.
I think the following code will work as you expect. The LeakyQueue will ensure that only the last value from the producer will be processed by the consumer. As the complexity is very symmetric, the consumer will consume all values produced by the producer. If you increase the delay argument, you can simulate that the consumer only processes the last value created by the producer.
import asyncio
class LeakyQueue(asyncio.Queue):
async def put(self, item):
if self.full():
await self.get()
await super().put(item)
async def consumer(queue, delay=0):
h = 1
while True:
a = await queue.get()
if delay:
await asyncio.sleep(delay)
print ('consumer', a)
h += 2
async def producer(queue):
i = 1
while True:
await asyncio.ensure_future(queue.put(i))
print ('producer', i)
i += 1
loop = asyncio.get_event_loop()
queue = LeakyQueue(maxsize=1)
tasks = [
asyncio.ensure_future(consumer(queue, 0)),
asyncio.ensure_future(producer(queue))
]
loop.run_until_complete(asyncio.gather(*tasks))