Python, invoke a process pool without blocking the event loop - python

If I run the following code:
import asyncio
import time
import concurrent.futures
def cpu_bound(mul):
for i in range(mul*10**8):
i+=1
print('result = ', i)
return i
async def say_after(delay, what):
print('sleeping async...')
await asyncio.sleep(delay)
print(what)
# The run_in_pool function must not block the event loop
async def run_in_pool():
with concurrent.futures.ProcessPoolExecutor() as executor:
result = executor.map(cpu_bound, [1, 1, 1])
async def main():
task1 = asyncio.create_task(say_after(0.1, 'hello'))
task2 = asyncio.create_task(run_in_pool())
task3 = asyncio.create_task(say_after(0.1, 'world'))
print(f"started at {time.strftime('%X')}")
await task1
await task2
await task3
print(f"finished at {time.strftime('%X')}")
if __name__ == '__main__':
asyncio.run(main())
The output is:
started at 18:19:28
sleeping async...
result = 100000000
result = 100000000
result = 100000000
sleeping async...
hello
world
finished at 18:19:34
This shows that the event loop blocks until the cpu bound jobs (task2) finish and it continues afterwards with the task3.
If I run only one cpu bound job (the run_in_pool is the following one):
async def run_in_pool():
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as executor:
result = await loop.run_in_executor(executor, cpu_bound, 1)
Then it seems that the event loop doesn't block since the output is:
started at 18:16:23
sleeping async...
sleeping async...
hello
world
result = 100000000
finished at 18:16:28
How can I run many cpu bound jobs (in task2) in a process pool without blocking the event loop?

As you discovered, you need to use asyncio's own run_in_executor to wait for submitted tasks to finish without blocking the event loop. Asyncio doesn't provide the equivalent of map, but it's not hard to emulate it:
async def run_in_pool():
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [loop.run_in_executor(executor, cpu_bound, i)
for i in (1, 1, 1)]
result = await asyncio.gather(*futures)

Related

Pause all asyncio tasks in Python

I have some asyncio tasks and I need to pause all of them.
This is my part of code:
import asyncio
import random
async def workers1():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def workers2():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def my_print(k):
print(k)
if k == 122:
>>>>>>> suspend all of workers
while k != 155:
k = await repair()
await asyncio.sleep(1)
r>>>>>> resume all of workers
async def main():
tasks = [asyncio.create_task(workers1()),
asyncio.create_task(workers2())
]
[await x for x in tasks]
if __name__ == '__main__':
asyncio.run(main())
How can I suspend all of workers in my code when trouble happens in a function my_print and after repair in my_print resume all of tasks?
I will be glad if you give an example.
I have been seen this link. But that's not what I need.
Simply replace your call to await asyncio.sleep(1) with time.sleep(1). If your code doesn't have an await expression in it, all the other tasks are effectively blocked.
import asyncio
import random
import time
async def workers1():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def workers2():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def my_print(k):
print(k)
if k == 122:
>>>>>>> suspend all of workers
while k != 155:
k = random.randint(100, 200)
time.sleep(1.0) # CHANGE HERE
r>>>>>> resume all of workers
async def main():
tasks = [asyncio.create_task(workers1()),
asyncio.create_task(workers2())
]
[await x for x in tasks]
if __name__ == '__main__':
asyncio.run(main())
So, first, note that the time.sleep trick can be replaced with any non-asynchronous code. So you can do anything that runs synchronously instead of time.sleep.
Including set up a second asyncio loop in a different thread and run tasks in that loop.
The following code uses ThreadPoolExecutor from concurrent.futures to set up a new event loop. In particular:
future = executor.submit(asyncio.run, task_3())
Will set up a new thread and run task_3 in that new loop.
The next line future.result() blocks the entire first loop (task_1 and task_2) until task_3 exits.
In task_3 you can do any asyncio operations you like, and until that exits all of the existing tasks will be suspended.
import asyncio, concurrent.futures
async def task_1():
while True:
print('task 1 runs')
await asyncio.sleep(1)
async def task_2():
print('task 2 starts')
await asyncio.sleep(5)
print('first set of tasks suspends')
future = executor.submit(asyncio.run, task_3())
print('suspending existing tasks')
future.result()
print('resuming tasks')
async def task_3():
print('task 3 runs')
await asyncio.sleep(4)
print('task 3 finishes')
async def main():
asyncio.ensure_future(task_1())
asyncio.ensure_future(task_2())
await asyncio.sleep(15)
executor = concurrent.futures.ThreadPoolExecutor()
asyncio.run(main())

How do Async IO Coroutines get executed?

I'm looking at this piece of code from the example from here
And i want to know at what exact moment does the consumers() coroutine get called?
import asyncio
import itertools as it
import os
import random
import time
async def makeitem(size: int = 5) -> str:
return os.urandom(size).hex()
async def randsleep(a: int = 1, b: int = 5, caller=None) -> None:
i = random.randint(0, 10)
if caller:
print(f"{caller} sleeping for {i} seconds.")
await asyncio.sleep(i)
async def produce(name: int, q: asyncio.Queue) -> None:
n = random.randint(0, 10)
for _ in it.repeat(None, n): # Synchronous loop for each single producer
await randsleep(caller=f"Producer {name}")
i = await makeitem()
t = time.perf_counter()
await q.put((i, t))
print(f"Producer {name} added <{i}> to queue.")
async def consume(name: int, q: asyncio.Queue) -> None:
while True:
await randsleep(caller=f"Consumer {name}")
i, t = await q.get()
now = time.perf_counter()
print(f"Consumer {name} got element <{i}>"
f" in {now-t:0.5f} seconds.")
q.task_done()
async def main(nprod: int, ncon: int):
q = asyncio.Queue()
producers = [asyncio.create_task(produce(n, q)) for n in range(nprod)]
consumers = [asyncio.create_task(consume(n, q)) for n in range(ncon)]
await asyncio.gather(*producers)
await q.join() # Implicitly awaits consumers, too
for c in consumers:
c.cancel()
if __name__ == "__main__":
import argparse
random.seed(444)
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--nprod", type=int, default=5)
parser.add_argument("-c", "--ncon", type=int, default=10)
ns = parser.parse_args()
start = time.perf_counter()
asyncio.run(main(**ns.__dict__))
elapsed = time.perf_counter() - start
print(f"Program completed in {elapsed:0.5f} seconds.")
I only see this line triggering the execution for both producer and consumer coroutines.
await asyncio.gather(*producers)
I don't understand how until the await line mentioned above there's no execution in the background when tasks are defined and created in these lines( because none of the print statements inside the producers and consumers are displayed).:
producers = [asyncio.create_task(produce(n, q)) for n in range(nprod)]
consumers = [asyncio.create_task(consume(n, q)) for n in range(ncon)]
While create_task() doesn't start executing the coroutine immediately, it schedules execution in the background at the first possible opportunity, i.e. at the first await that suspends to the event loop.
gather() is just a helper function that waits for the given awaitables to complete. It doesn't prevent previously scheduled coroutines (such as those started with create_task, but also start_server etc.) from executing.
i want to know at what exact moment does the consumers() coroutine get called?
Since consumers is a coroutine, while it's called once, it can suspend and resume many times, each await serving as a point of suspension/resumption. When you call create_task() it is placed in a queue of runnable coroutines. In each iteration of the event loop asyncio goes through runnable coroutines and executes a "step" of each, where the step executes it until the first await that chooses to suspend. In your code the step happens when your main coroutine suspends in order to wait for gather() to complete.

How can python bypass my prior code to run later codes first?

import asyncio
import time
async def func():
print('task start')
await asyncio.sleep(10)
print('task end')
async def main():
task1 = asyncio.create_task(func())
task2 = asyncio.create_task(func())
task3 = asyncio.create_task(func())
task4 = asyncio.create_task(func())
s = time.monotonic()
print('main start', time.monotonic() - s)
await task1
print('main continue', time.monotonic() - s)
await task2
print('main continue', time.monotonic() - s)
await task3
print('main continue', time.monotonic() - s)
await task4
print('main end', time.monotonic() - s)
asyncio.run(main())
This code gives results as below:
main start 0.0
task start
task start
task start
task start
task end
task end
task end
task end
main continue 10.0
main continue 10.0
main continue 10.0
main end
But how is this possible ,python bypassed my prior print calls ,
run the awaitables first and then go back to make print calls ,
how shall I make sense of this ?
All your tasks are sleeping 10 seconds, then continue and finish almost instantly.
So all the await calls will unlock at the same time since when task 1 if finished, all tasks will be finished as well.
You are right in the sense that you technically could have intertwined prints between task end and main continue but I guess this is an implementation detail that everything appears grouped.
I think you can have a better understanding of what is going on using this tweaked script:
import asyncio
import time
async def func(task_nb, wait):
print('[%s] task start' % task_nb)
await asyncio.sleep(wait)
print('[%s] task end' % task_nb)
async def main():
task1 = asyncio.create_task(func(1, 1))
task2 = asyncio.create_task(func(2, 5))
task3 = asyncio.create_task(func(3, 7))
task4 = asyncio.create_task(func(4, 2))
s = time.monotonic()
print('main start', time.monotonic() - s)
await task1
print('main continue', time.monotonic() - s)
await task2
print('main continue', time.monotonic() - s)
await task3
print('main continue', time.monotonic() - s)
await task4
print('main end', time.monotonic() - s)
asyncio.run(main())
You will have a more interesting await behaviour:
main start 1.81000359589234e-07
[1] task start
[2] task start
[3] task start
[4] task start
[1] task end
main continue 1.0019499360005284
[4] task end
[2] task end
main continue 5.001785704000213
[3] task end
main continue 7.003587035000237
main end 7.003632674000073
Your code does what it is supposed to do according to the specification of asyncio, but perhaps you are misunderstanding what a "task" is.
From the docs:
asyncio.create_task(coro, *, name=None)
Wrap the coro coroutine into a Task and schedule its execution. Return the Task object.
This means that as you've created 4 tasks at the beginning of your main, they are all scheduled to execute starting at the same time. And so they all print task start together, then print task end together, at which point, all of your awaits instantly fall through as all tasks have completed at the same time (10 seconds later). So finally you see main continue 10.0 3 times.
Try this code, I believe it will have the behavior you are expecting.
async def main():
task1 = func()
task2 = func()
task3 = func()
task4 = func()
...

How to get the someone task's return immediately not until all task completed in [asyncio]?

How to get the someone task's return immediately not until all task completed in [asyncio] ?
import asyncio
import time
print(f"now: {time.strftime('%X')}")
async def test1():
print(f"test1 started at {time.strftime('%X')}")
await asyncio.sleep(5)
with open('test.txt', 'w') as f:
f.write('...')
return 'end....'
async def test2(num):
print(f"test2 started at {time.strftime('%X')}")
return num * num
async def main(num):
res = await asyncio.gather(test1(), test2(num))
return res
def my(num):
return asyncio.run(main(num))[1]
print(my(5))
print(f"all end at {time.strftime('%X')}")
From the above code(python 3.7+), I can only get test2 return after test1 and test2 are all completed.
How can I let the main function get test2 return after test2 is completed, instead of waiting until test1 is completed?, because test2 is executed more quickly. And test1 must be executed(generate test.txt file.)
Than means return (test2's return) to main function or my function as soon as possible when test1 and test2 is asynchronous.
To run a set of awaitables/coroutines until any Future/Task finishes or is canceled - you need asyncio.wait coroutine:
...
async def main(num):
done, pending = await asyncio.wait([test1(), test2(num)],
return_when=asyncio.FIRST_COMPLETED)
for coro in done:
return await coro
def my(num):
return asyncio.run(main(num))
print(my(5))
print(f"all end at {time.strftime('%X')}")
return_when indicates when this function should return
The output:
now: 18:50:16
test1 started at 18:50:16
test2 started at 18:50:16
25
all end at 18:50:16
But since you need all coroutines to be completed - use asyncio.as_completed approach OR print results from done set, then - await from pending set and change print(my(5)) to just my(5):
...
async def main(num):
done, pending = await asyncio.wait([test1(), test2(num)],
return_when=asyncio.FIRST_COMPLETED)
for coro in done:
print(await coro)
for coro in pending:
await coro
def my(num):
return asyncio.run(main(num))
my(5)
print(f"all end at {time.strftime('%X')}")

Why is asyncio queue await get() blocking?

Why is await queue.get() blocking?
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
async def main():
queue = asyncio.Queue()
await consumer(queue)
await producer(queue, 1)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
If I call the producer() before consumer(), it works fine
That is to say, the following works fine.
async def main():
queue = asyncio.Queue()
await producer(queue, 1)
await consumer(queue)
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
You need to start the consumer and the producer in parallel, e.g. defining main like this:
async def main():
queue = asyncio.Queue()
await asyncio.gather(consumer(queue), producer(queue, 1))
If for some reason you can't use gather, then you can do (the equivalent of) this:
async def main():
queue = asyncio.Queue()
asyncio.create_task(consumer(queue))
asyncio.create_task(producer(queue, 1))
await asyncio.sleep(100) # what your program actually does
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
await queue.get() is yielding control back to the event loop. But await means wait, so when your main coroutine says await consumer(queue), that means "resume me once consumer(queue) has completed." Since consumer(queue) is itself waiting for someone to produce something, you have a classic case of deadlock.
Reversing the order works only because your producer is one-shot, so it immediately returns to the caller. If your producer happened to await an external source (such as a socket), you would have a deadlock there as well. Starting them in parallel avoids the deadlock regardless of how producer and consumer are written.
It's because you call await consumer(queue), which means the next line (procuder) will not be called until consumer returns, which it of course never does because nobody produced yet
check out the Example in the docs and see how they use it there: https://docs.python.org/3/library/asyncio-queue.html#examples
another simple example:
import asyncio
import random
async def produce(queue, n):
for x in range(1, n + 1):
# produce an item
print('producing {}/{}'.format(x, n))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
item = str(x)
# put the item in the queue
await queue.put(item)
# indicate the producer is done
await queue.put(None)
async def consume(queue):
while True:
# wait for an item from the producer
item = await queue.get()
if item is None:
# the producer emits None to indicate that it is done
break
# process the item
print('consuming item {}...'.format(item))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
producer_coro = produce(queue, 10)
consumer_coro = consume(queue)
loop.run_until_complete(asyncio.gather(producer_coro, consumer_coro))
loop.close()
You should use .run_until_complete() with .gather()
Here is your updated code:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.run_until_complete(
asyncio.gather(consumer(queue), producer(queue, 1))
)
loop.close()
Out:
val = 1
Also you could use .run_forever() with .create_task()
So your code snippet will be:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.create_task(consumer(queue))
loop.create_task(producer(queue, 1))
try:
loop.run_forever()
except KeyboardInterrupt:
loop.close()
Out:
val = 1

Categories

Resources