Hi I have to process several objects queued 5 at time.
I have a queue of 5 items.
Sometimes process fails and an exception occurs:
async def worker(nam):
while True:
queue_item = await queue.get()
Worker starts the process loop and tries to process items
try:
loop = asyncio.get_event_loop()
task = loop.create_task(download(queue_item, path))
download_result = await asyncio.wait_for(task, timeout=timeout)
except asyncio.TimeoutError:
unfortunately the process timed out.
Can I add like this ?
except asyncio.TimeoutError:
await queue.put(queue_item)
I want to process again that item on next round
Thank
Yes, you can re-queue an object at the end of the queue for processing. A simple
example based on your code:
import asyncio
from random import randrange
async def download(item):
print("Process item", item)
if randrange(4) == 1: # simulate occasional event
await asyncio.sleep(100) # trigger timeout error
async def worker(queue):
while True:
queue_item = await queue.get()
try:
result = await asyncio.wait_for(download(queue_item), timeout=1)
except asyncio.TimeoutError:
print("Timeout for ", queue_item)
await queue.put(queue_item)
queue.task_done()
async def main():
q = asyncio.Queue()
asyncio.create_task(worker(q))
for i in range(5): # put 5 items to process
await q.put(i)
await q.join()
asyncio.run(main())
Process item 0
Timeout for 0
Process item 1
Process item 2
Process item 3
Timeout for 3
Process item 4
Process item 0
Process item 3
Related
import asyncio
import random
async def producer(q: asyncio.Queue):
for _ in range(100):
await q.put(random.randint(1, 10))
async def consumer(q: asyncio.Queue):
while True:
num = await q.get()
print("Got From Queue: ", num)
q.task_done()
async def main():
q = asyncio.Queue()
pr = asyncio.create_task(producer(q))
consumers = [asyncio.create_task(consumer(q)) for _ in range(10)]
await pr
await q.join()
for c in consumers:
c.cancel()
asyncio.run(main())
I created this script to replicate a simplified version of my problem.
so this is a very basic producer-consumer type of script, and this works just fine
however if an exception occurs inside the consumer, the script freezes.
for example, If I modify the consumer script like so:
async def consumer(q: asyncio.Queue):
raise Exception
while True:
num = await q.get()
print("Got From Queue: ", num)
q.task_done()
when I run the script now it freezes and no exceptions get thrown.
However, If I change the main script to generate all the consumer tasks in a for loop instead of generating them through a list comprehension like so:
async def main():
q = asyncio.Queue()
pr = asyncio.create_task(producer(q))
# consumers = [asyncio.create_task(consumer(q)) for _ in range(10)]
for _ in range(10):
asyncio.create_task(consumer(q))
await pr
await q.join()
It throws 10 exceptions of type Exception, as expected behavior.
my 2 questions are:
Why does this strange behaviour happen?
In my actual script, I want to use exception handling, so I'm using now the for-loop approach. However, In this approach, I don't have references to all the consumer tasks, so I can't cancel them when I want. How can I do that? (Simply appending them to a list after creation replicates the original problem)
I am completely clueless, any help appreciated!
I don't think that the use of a list comprehension instead of a for loop has any impact on this.
You should handle the exception in the consumer and call task_done() for each get(), otherwise you'll experience the current behaviour where q.join() will block/wait for unfinished tasks:
task_done()
If a join() is currently blocking, it will resume when all items have
been processed (meaning that a task_done() call was received for every
item that had been put() into the queue).
test.py:
import asyncio
import random
async def producer(q):
for _ in range(10):
await q.put(random.randint(1, 10))
async def consumer(q):
while True:
num = await q.get()
try:
if num in (5, 8):
raise Exception("ERROR")
print(f"Working on: {num}")
except Exception as exc:
print(f"{num}: {exc}")
finally:
q.task_done()
async def main():
q = asyncio.Queue()
pr = asyncio.create_task(producer(q))
tasks = []
for _ in range(10):
tasks.append(asyncio.create_task(consumer(q)))
await pr
await q.join()
for task in tasks:
task.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
if __name__ == "__main__":
asyncio.run(main())
Test:
$ python test.py
Working on: 7
Working on: 4
Working on: 4
Working on: 4
Working on: 1
Working on: 3
8: ERROR
Working on: 6
5: ERROR
Working on: 4
This is a test script I created to better understand task cancellation -
import asyncio
import random
import signal
import traceback
async def shutdown(signame, loop):
print("Shutting down")
tasks = [task for task in asyncio.Task.all_tasks()]
for task in tasks:
task.cancel()
try:
await task
except asyncio.CancelledError:
print("Task cancelled: %s", task)
loop.stop()
async def another():
await asyncio.sleep(2)
async def some_other_process():
await asyncio.sleep(5)
return "Me"
async def process(job, loop, i):
print(i)
task = loop.create_task(some_other_process())
value = await task
if i < 1:
another_task = loop.create_task(another())
await another_task
# await some_other_process()
def pull(loop):
i = 0
while True:
job = f"random-integer-{random.randint(0, 100)}"
try:
loop.run_until_complete(process(job, loop, i))
i += 1
except asyncio.CancelledError as e:
print("Task cancelled")
break
except Exception:
print(traceback.format_exc())
# asyncio.get_event_loop().stop()
def main():
try:
loop = asyncio.get_event_loop()
for signame in ['SIGINT']:
loop.add_signal_handler(
getattr(signal, signame),
lambda: asyncio.ensure_future(shutdown(signame, loop))
)
try:
pull(loop)
except Exception:
print(traceback.format_exc())
finally:
loop.close()
finally:
print("Done")
if __name__ == "__main__":
main()
And I can not understand why I see -
Task was destroyed but it is pending!
task: <Task cancelling coro=<shutdown() done, defined at test.py:6>>
loop.add_signal_handler(
getattr(signal, signame),
lambda: asyncio.ensure_future(shutdown(signame, loop))
)
Here using asyncio.ensure_future you create task for shutdown coroutine, but you don't await anywhere for this task to be finished. Later when you close event loop it warns you this task is pending.
Upd:
If you want to do some clenup, the best place for it is right before loop.close() regardless of reason your script ended (signal, exception, etc.)
Try to alter your code this way:
# ...
async def shutdown(loop): # remove `signal` arg
# ...
def main():
try:
loop = asyncio.get_event_loop()
try:
pull(loop)
except Exception:
print(traceback.format_exc())
finally:
loop.run_until_complete(shutdown(loop)) # just run until shutdown is done
loop.close()
finally:
print("Done")
# ...
Upd2:
In case you still want signal handler, you probably want to do something like this:
from functools import partial
loop.add_signal_handler(
getattr(signal, signame),
partial(cb, signame, loop)
)
def cb(signame, loop):
loop.stop()
loop.run_until_complete(shutdown(signame, loop))
Why is await queue.get() blocking?
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
async def main():
queue = asyncio.Queue()
await consumer(queue)
await producer(queue, 1)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
If I call the producer() before consumer(), it works fine
That is to say, the following works fine.
async def main():
queue = asyncio.Queue()
await producer(queue, 1)
await consumer(queue)
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
You need to start the consumer and the producer in parallel, e.g. defining main like this:
async def main():
queue = asyncio.Queue()
await asyncio.gather(consumer(queue), producer(queue, 1))
If for some reason you can't use gather, then you can do (the equivalent of) this:
async def main():
queue = asyncio.Queue()
asyncio.create_task(consumer(queue))
asyncio.create_task(producer(queue, 1))
await asyncio.sleep(100) # what your program actually does
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
await queue.get() is yielding control back to the event loop. But await means wait, so when your main coroutine says await consumer(queue), that means "resume me once consumer(queue) has completed." Since consumer(queue) is itself waiting for someone to produce something, you have a classic case of deadlock.
Reversing the order works only because your producer is one-shot, so it immediately returns to the caller. If your producer happened to await an external source (such as a socket), you would have a deadlock there as well. Starting them in parallel avoids the deadlock regardless of how producer and consumer are written.
It's because you call await consumer(queue), which means the next line (procuder) will not be called until consumer returns, which it of course never does because nobody produced yet
check out the Example in the docs and see how they use it there: https://docs.python.org/3/library/asyncio-queue.html#examples
another simple example:
import asyncio
import random
async def produce(queue, n):
for x in range(1, n + 1):
# produce an item
print('producing {}/{}'.format(x, n))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
item = str(x)
# put the item in the queue
await queue.put(item)
# indicate the producer is done
await queue.put(None)
async def consume(queue):
while True:
# wait for an item from the producer
item = await queue.get()
if item is None:
# the producer emits None to indicate that it is done
break
# process the item
print('consuming item {}...'.format(item))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
producer_coro = produce(queue, 10)
consumer_coro = consume(queue)
loop.run_until_complete(asyncio.gather(producer_coro, consumer_coro))
loop.close()
You should use .run_until_complete() with .gather()
Here is your updated code:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.run_until_complete(
asyncio.gather(consumer(queue), producer(queue, 1))
)
loop.close()
Out:
val = 1
Also you could use .run_forever() with .create_task()
So your code snippet will be:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.create_task(consumer(queue))
loop.create_task(producer(queue, 1))
try:
loop.run_forever()
except KeyboardInterrupt:
loop.close()
Out:
val = 1
I have a program with one producer and two slow consumers and I'd like to rewrite it with coroutines in such way that each consumer will handle only last value (i.e. skip new values generated during processing the old ones) produced for it (I used threads and threading.Queue() but with it blocks on put(), cause the queue will be full most of the time).
After reading answer to this question I decided to use asyncio.Event and asyncio.Queue. I wrote this prototype program:
import asyncio
async def l(event, q):
h = 1
while True:
# ready
event.set()
# get value to process
a = await q.get()
# process it
print(a * h)
h *= 2
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
and I have noticed that l coroutine blocks on q.get() line and doesn't print anything.
It works as I expect after adding asyncio.sleep() in both (I get 1,11,21,...):
import asyncio
import time
async def l(event, q):
h = 1
a = 1
event.set()
while True:
# await asyncio.sleep(1)
a = await q.get()
# process it
await asyncio.sleep(1)
print(a * h)
event.set()
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
await asyncio.sleep(0.1)
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
...but I'm looking for solution without it.
Why is it so? How can I fix it? I think I cannot call await l() from m as both of them have states (in original program the first draws solution with PyGame and the second plots results).
The code is not working as expected as the task running the m function is never stopped. The task will continue increment i in the case that event.is_set() == False. Because this task is never suspended, the task running function l will never be called. Therefore, you need a way to suspend the task running function m. One way of suspending is awaiting another coroutine, that is the reason why a asyncio.sleep works as expected.
I think the following code will work as you expect. The LeakyQueue will ensure that only the last value from the producer will be processed by the consumer. As the complexity is very symmetric, the consumer will consume all values produced by the producer. If you increase the delay argument, you can simulate that the consumer only processes the last value created by the producer.
import asyncio
class LeakyQueue(asyncio.Queue):
async def put(self, item):
if self.full():
await self.get()
await super().put(item)
async def consumer(queue, delay=0):
h = 1
while True:
a = await queue.get()
if delay:
await asyncio.sleep(delay)
print ('consumer', a)
h += 2
async def producer(queue):
i = 1
while True:
await asyncio.ensure_future(queue.put(i))
print ('producer', i)
i += 1
loop = asyncio.get_event_loop()
queue = LeakyQueue(maxsize=1)
tasks = [
asyncio.ensure_future(consumer(queue, 0)),
asyncio.ensure_future(producer(queue))
]
loop.run_until_complete(asyncio.gather(*tasks))
I'm quite new in this python asyncio topic. I have a simple question:
I have a task containing two coroutines to be run concurrently. First coroutine(my_coroutine) would just print something continuously until second_to_sleep is reached. The second coroutine(seq_coroutine) would call 4 other coroutines sequentially one after the other. My goal is to stop the loop at the end of seq_coroutine whenever it is completely finished. To be exact, I want my_coroutine be alive until seq_coroutine is finished. Can someone help me with that?
My code is like this:
import asyncio
async def my_coroutine(task, seconds_to_sleep = 3):
print("{task_name} started\n".format(task_name=task))
for i in range(1, seconds_to_sleep):
await asyncio.sleep(1)
print("\n{task_name}: second {seconds}\n".format(task_name=task, seconds=i))
async def coroutine1():
print("coroutine 1 started")
await asyncio.sleep(1)
print("coroutine 1 finished\n")
async def coroutine2():
print("coroutine 2 started")
await asyncio.sleep(1)
print("coroutine 2 finished\n")
async def coroutine3():
print("coroutine 3 started")
await asyncio.sleep(1)
print("coroutine 3 finished\n")
async def coroutine4():
print("coroutine 4 started")
await asyncio.sleep(1)
print("coroutine 4 finished\n")
async def seq_coroutine():
await coroutine1()
await coroutine2()
await coroutine3()
await coroutine4()
def main():
main_loop = asyncio.get_event_loop()
task = [asyncio.ensure_future(my_coroutine("task1", 11)),
asyncio.ensure_future(seq_coroutine())]
try:
print('loop is started\n')
main_loop.run_until_complete(asyncio.gather(*task))
finally:
print('loop is closed')
main_loop.close()
if __name__ == "__main__":
main()
This is the output of this program:
loop is started
task1 started
coroutine 1 started
task1: second 1
coroutine 1 finished
coroutine 2 started
task1: second 2
coroutine 2 finished
coroutine 3 started
task1: second 3
coroutine 3 finished
coroutine 4 started
task1: second 4
coroutine 4 finished
task1: second 5
task1: second 6
task1: second 7
task1: second 8
task1: second 9
task1: second 10
loop is closed
I only want to have something like this:
loop is started
task1 started
coroutine 1 started
task1: second 1
coroutine 1 finished
coroutine 2 started
task1: second 2
coroutine 2 finished
coroutine 3 started
task1: second 3
coroutine 3 finished
coroutine 4 started
task1: second 4
coroutine 4 finished
loop is closed
I just found a suitable solution for my problem.
I won't remove my post and I'll post my solution so that it may help others who face the same question.
I used asyncio.wait(task, return_when=asyncio.FIRST_COMPLETED) and it will return the result whenever the first task is finished.
This is the solution:
import asyncio
from asyncio.tasks import FIRST_COMPLETED
from concurrent.futures import CancelledError
async def my_coroutine(task, seconds_to_sleep = 3):
print("{task_name} started\n".format(task_name=task))
for i in range(1, seconds_to_sleep):
await asyncio.sleep(1)
print("\n{task_name}: second {seconds}\n".format(task_name=task, seconds=i))
async def coroutine1():
print("coroutine 1 started")
await asyncio.sleep(1)
print("coroutine 1 finished\n")
async def coroutine2():
print("coroutine 2 started")
await asyncio.sleep(1)
print("coroutine 2 finished\n")
async def coroutine3():
print("coroutine 3 started")
await asyncio.sleep(1)
print("coroutine 3 finished\n")
async def coroutine4():
print("coroutine 4 started")
await asyncio.sleep(1)
print("coroutine 4 finished\n")
async def seq_coroutine(loop):
await coroutine1()
await coroutine2()
await coroutine3()
await coroutine4()
def main():
main_loop = asyncio.get_event_loop()
task = [asyncio.ensure_future(my_coroutine("task1", 11)),
asyncio.ensure_future(seq_coroutine(main_loop))]
try:
print('loop is started\n')
done, pending = main_loop.run_until_complete(asyncio.wait(task, return_when=asyncio.FIRST_COMPLETED))
print("Completed tasks: {completed}\nPending tasks: {pending}".format(completed = done, pending = pending))
#canceling the tasks
for task in pending:
print("Cancelling {task}: {task_cancel}".format(task=task, task_cancel=task.cancel()))
except CancelledError as e:
print("Error happened while canceling the task: {e}".format(e=e))
finally:
print('loop is closed')
if __name__ == "__main__":
main()
You can use a variable to signal to another coroutine. asyncio.Event is usually used:
import asyncio
import random
async def clock(name, event):
print("* {} started".format(name))
i = 0
while not event.is_set():
await asyncio.sleep(0.1)
i += 1
print("* {}: {}".format(name, i))
print("* {} done".format(name))
return i
async def coro(x):
print("coro() started", x)
await asyncio.sleep(random.uniform(0.2, 0.5))
print("coro() finished", x)
async def seq_coroutine(name):
event = asyncio.Event()
clock_task = asyncio.ensure_future(clock(name, event))
# await asyncio.sleep(0) # if you want to give a chance to clock() to start
await coro(1)
await coro(2)
await coro(3)
await coro(4)
event.set()
i = await clock_task
print("Got:", i)
def main():
main_loop = asyncio.get_event_loop()
main_loop.run_until_complete(seq_coroutine("foo"))
main_loop.close()
if __name__ == "__main__":
main()
You can also use await event.wait() to block a piece of code until the event is set:
async def xxx(event):
print("xxx started")
await event.wait()
print("xxx ended")
Here's another way to do the same thing, which I think is cleaner in representing the dependence between jobs:
import asyncio
async def poll():
i = 0
while True:
print("First", i)
i += 1
await asyncio.sleep(20)
print("Second", i)
i += 1
await asyncio.sleep(20)
async def stop():
poller = asyncio.ensure_future(poll())
await asyncio.sleep(5)
poller.cancel()
main_loop = asyncio.get_event_loop()
main_loop.run_until_complete(stop())
main_loop.close()
Basically, instead of breaking the entire event loop on a single job ending and then cancelling the job there, we just cancel the dependent job directly when the parent job finishes.