Elapsed time of coroutines/futures using asyncio.gather() - python

I have a list of async queries which I'm gathering with asyncio.gather() & waiting with asyncio.run_until_complete(). Something like:
queries = [
async_query_a(),
async_query_b()
]
loop = asyncio.get_event_loop()
tasks = asyncio.gather(*queries)
results = loop.run_until_complete(tasks)
I would like to know the "waiting time" of each of the queries. Something like a #log_performance wrapper which logs the elapsed time of the future/coroutine completion.

Here is an example implementation of timecoro timeing a coroutine function.
import asyncio
import functools
import logging
import random
import time
def timecoro(corofn):
#functools.wraps(corofn)
async def wrapper(*args, **kwargs):
start = time.time()
try:
result = await corofn(*args, **kwargs)
except Exception:
finish = time.time() - start
logging.info('%s failed in %.2f', corofn, finish)
raise
else:
finish = time.time() - start
logging.info('%s succeeded in %.2f', corofn, finish)
return result
return wrapper
#timecoro
async def async_query_a():
await asyncio.sleep(random.randint(0, 4))
#timecoro
async def async_query_b():
await asyncio.sleep(random.randint(0, 4))
raise RuntimeError
async def main():
queries = [
async_query_a(),
async_query_b(),
]
await asyncio.gather(*queries)
if __name__ == '__main__':
logging.basicConfig(level='INFO')
asyncio.run(main())

Related

Aggregation of 2 RabbitMQ messages does not work properly (messages hanging unacked)

I need to listen tasks on 2 queues, so I wrote the code below, but it has a problem. Currently it behaves like this: if the code started when 2 queues were full, it works great. But if queues were empty one of them was, the code reads messages, but does not proccess them (does not send ack, does not do the logic). But the messages became unacked, until I stop the code. I do not see any reason to be them unacked and unprocessed.
I can't understand what is wrong with the code? May be there is another way to aggregate 2 or more queues like this?
# task_processor.py
from aio_pika import IncomingMessage
class TaskProcessor:
MAX_TASKS_PER_INSTANCE = 1
def __init__(self):
self._tasks = []
def can_accept_new_task(self) -> bool:
return len(self._tasks) < self.MAX_TASKS_PER_INSTANCE
async def process(self, message: IncomingMessage):
self._tasks.append(message)
print(message.body)
await message.ack()
self._tasks.pop()
# main.py
import asyncio
from asyncio import QueueEmpty
from typing import Callable
import aio_pika
from aio_pika import RobustQueue
from dotenv import load_dotenv
load_dotenv()
from core.logger.logger import logger
from core.services.rabbitmq.task_processor.task_processor import TaskProcessor
async def get_single_task(queue: RobustQueue):
while True:
try:
msg = await queue.get(timeout=3600)
return msg
except QueueEmpty:
await asyncio.sleep(3)
except asyncio.exceptions.TimeoutError:
logger.warning('queue timeout error')
pass
except Exception as ex:
logger.error(f"{queue} errored", exc_info=ex)
async def task_aggregator(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
while True:
if should_take_new_task_cb():
queue2, queue1 = queue1, queue2
gen1 = get_single_task(queue1)
gen2 = get_single_task(queue2)
done, _ = await asyncio.wait([gen1, gen2], return_when=asyncio.FIRST_COMPLETED)
for item in done:
result = item.result()
yield result
else:
await asyncio.sleep(1)
async def tasks(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
async for task in task_aggregator(queue1, queue2, should_take_new_task_cb):
yield task
async def main():
connection = await aio_pika.connect_robust(
f"amqp://user:password#host:port/vhost?heartbeat={180}"
)
channel1 = connection.channel()
channel2 = connection.channel()
await channel1.initialize()
await channel2.initialize()
queue1 = await channel1.get_queue('queue1')
queue2 = await channel2.get_queue('queue2')
task_processor = TaskProcessor()
task_generator = tasks(queue1, queue2, task_processor.can_accept_new_task)
while True:
if task_processor.can_accept_new_task():
task = await anext(task_generator)
await task_processor.process(task)
else:
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.run(main())

Pause all asyncio tasks in Python

I have some asyncio tasks and I need to pause all of them.
This is my part of code:
import asyncio
import random
async def workers1():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def workers2():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def my_print(k):
print(k)
if k == 122:
>>>>>>> suspend all of workers
while k != 155:
k = await repair()
await asyncio.sleep(1)
r>>>>>> resume all of workers
async def main():
tasks = [asyncio.create_task(workers1()),
asyncio.create_task(workers2())
]
[await x for x in tasks]
if __name__ == '__main__':
asyncio.run(main())
How can I suspend all of workers in my code when trouble happens in a function my_print and after repair in my_print resume all of tasks?
I will be glad if you give an example.
I have been seen this link. But that's not what I need.
Simply replace your call to await asyncio.sleep(1) with time.sleep(1). If your code doesn't have an await expression in it, all the other tasks are effectively blocked.
import asyncio
import random
import time
async def workers1():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def workers2():
while True:
k = random.randint(100, 200)
await asyncio.sleep(k)
await my_print(k)
async def my_print(k):
print(k)
if k == 122:
>>>>>>> suspend all of workers
while k != 155:
k = random.randint(100, 200)
time.sleep(1.0) # CHANGE HERE
r>>>>>> resume all of workers
async def main():
tasks = [asyncio.create_task(workers1()),
asyncio.create_task(workers2())
]
[await x for x in tasks]
if __name__ == '__main__':
asyncio.run(main())
So, first, note that the time.sleep trick can be replaced with any non-asynchronous code. So you can do anything that runs synchronously instead of time.sleep.
Including set up a second asyncio loop in a different thread and run tasks in that loop.
The following code uses ThreadPoolExecutor from concurrent.futures to set up a new event loop. In particular:
future = executor.submit(asyncio.run, task_3())
Will set up a new thread and run task_3 in that new loop.
The next line future.result() blocks the entire first loop (task_1 and task_2) until task_3 exits.
In task_3 you can do any asyncio operations you like, and until that exits all of the existing tasks will be suspended.
import asyncio, concurrent.futures
async def task_1():
while True:
print('task 1 runs')
await asyncio.sleep(1)
async def task_2():
print('task 2 starts')
await asyncio.sleep(5)
print('first set of tasks suspends')
future = executor.submit(asyncio.run, task_3())
print('suspending existing tasks')
future.result()
print('resuming tasks')
async def task_3():
print('task 3 runs')
await asyncio.sleep(4)
print('task 3 finishes')
async def main():
asyncio.ensure_future(task_1())
asyncio.ensure_future(task_2())
await asyncio.sleep(15)
executor = concurrent.futures.ThreadPoolExecutor()
asyncio.run(main())

How to measure time spent in blocking code while using asyncio in Python?

I'm currently migrating some Python code that used to be blocking to use asyncio with async/await. It is a lot of code to migrate at once so I would prefer to do it gradually and have metrics. With that thing in mind I want to create a decorator to wrap some functions and know how long they are blocking the event loop. For example:
def measure_blocking_code(f):
def wrapper(*args, **kwargs):
# ?????
# It should measure JUST 1 second
# not 5 which is what the whole async function takes
return wrapper
#measure_blocking_code
async def my_function():
my_blocking_function() # Takes 1 seconds
await my_async_function() # Takes 2 seconds
await my_async_function_2() # Takes 2 seconds
I know the event loop has a debug function that already report this, but I need to get that information for specific functions.
TLDR;
This decorator does the job:
def measure_blocking_code(f):
async def wrapper(*args, **kwargs):
t = 0
coro = f()
try:
while True:
t0 = time.perf_counter()
future = coro.send(None)
t1 = time.perf_counter()
t += t1 - t0
while not future.done():
await asyncio.sleep(0)
future.result() # raises exceptions if any
except StopIteration as e:
print(f'Function took {t:.2e} sec')
return e.value
return wrapper
Explanation
This workaround exploits the conventions used in asyncio implementation in cPython. These conventions are a superset of PEP-492. In other words:
You can generally use async/await without knowing these details.
This might not work with other async libraries like trio.
An asyncio coro object (coro) can be executed by calling .send() member. This will only run the blocking code, until an async call yields a Future object. By only measuring the time spent in .send(), the duration of the blocking code can be determined.
I finally found the way. I hope it helps somebody
import asyncio
import time
def measure(f):
async def wrapper(*args, **kwargs):
coro_wrapper = f(*args, **kwargs).__await__()
fut = asyncio.Future()
total_time = 0
def done(arg=None):
try:
nonlocal total_time
start_time = time.perf_counter()
next_fut = coro_wrapper.send(arg)
end_time = time.perf_counter()
total_time += end_time - start_time
next_fut.add_done_callback(done)
except StopIteration:
fut.set_result(arg)
except Exception as e:
fut.set_exception(e)
done()
res = await fut
print('Blocked for: ' + str(total_time) + ' seconds')
return res
return wrapper

Does calling thread.join() blocks the event loop in an asynchronous context?

I'm implementing a web API using aiohttp, deployed using gunicorn with UVloop enabled --worker-class aiohttp.GunicornUVLoopWebWorker. Therefore, my code always runs in an asynchronous context. I had the ideia of implementing parallel jobs in the handling of requests for better performance.
I'm not using asyncio because i want Parallelism, not Concurrency.
I'm aware of multiprocessing and the GIL problem in python. But joining a process also applies to my question.
Here is an example:
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
for thread in request.context['threads']:
thread.join()
return ret
Taking into account that thread.join() or process.join() blocks the current thread, this will block the event loop (As far as my knowledge goes). How can I join asynchronously? What I want can be represented figuratively as this: await thread.join() or await process.join().
Update:
Thanks to #user4815162342 I was able to write proper code for my project:
Middleware:
from aiohttp.web import middleware
from util.process_session import ProcessSession
#middleware
async def context_init(request, handler):
request.context = {}
request.context['process_session'] = ProcessSession()
request.context['processes'] = {}
ret = await handler(request)
await request.context['process_session'].wait_for_all()
return ret
Util:
import asyncio
import concurrent.futures
from functools import partial
class ProcessSession():
def __init__(self):
self.loop = asyncio.get_running_loop()
self.pool = concurrent.futures.ProcessPoolExecutor()
self.futures = []
async def wait_for_all(self):
await asyncio.wait(self.futures)
def add_process(self, f, *args, **kwargs):
ret = self.loop.run_in_executor(self.pool, partial(f, *args, **kwargs))
self.futures.append(ret)
return ret
class ProcessBase():
def __init__(self, process_session, f, *args, **kwargs):
self.future = process_session.add_process(f, *args, **kwargs)
async def wait(self):
await asyncio.wait([self.future])
return self.future.result()
Answering your question: Yes, it does block the event loop.
I found that ThreadPoolExecutor works pretty well on this situations.
from util.process_session import ProcessSession
from concurrent.futures.thread import ThreadPoolExecutor
import asyncio
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
with ThreadPoolExecutor(1) as executor:
await asyncio.get_event_loop().run_in_executor(executor,
functools.partial(join_threads, data={
'threads': request.context['threads']
}))
return ret
def join_threads(threads):
for t in threads:
t.join()
I found a solution using multiprocesses. It can be done using a Pool. The standard lib provides some "async" methods (It's not really async, it just separates the initialization of the process from the process' output): apply_async
Using a simple async wrapper, I managed to deliver what I wanted:
from multiprocessing import Pool
from async_converter import sync_to_async
import asyncio
def f(x):
i = 0
while i < 10000000 * x:
i = i + 1
print("Finished: " + str(x))
return i
async def run():
print("Started with run")
with Pool(processes=4) as pool: # start 4 worker processes
result1 = pool.apply_async(f, (10,)) # evaluate "f(10)" asynchronously
result2 = pool.apply_async(f, (2,))
res1 = await sync_to_async(result1.get)()
print(res1)
res2 = await sync_to_async(result2.get)()
print(res2)
async def dummy(output):
print(output)
async def main():
# Schedule three calls *concurrently*:
await asyncio.gather(
run(),
dummy("Nice"),
dummy("Async"),
dummy("Loop"),
dummy("Perfect"),
dummy("Dummy1"),
dummy("Dummy2"),
dummy("Dummy3"),
dummy("Dummy4"),
dummy("Dummy5"),
dummy("Dummy6"),
dummy("Dummy7"),
dummy("Dummy8"),
dummy("Dummy9"),
dummy("Dummy10"),
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
outputs:
Perfect
Dummy6
Nice
Dummy1
Dummy7
Started with run
Dummy2
Dummy8
Dummy3
Dummy9
Async
Dummy4
Dummy10
Loop
Dummy5
Finished: 2
Finished: 10
100000000
20000000
Parallelism with asyncio :)

Can't pickle coroutine objects when ProcessPoolExecutor is used in class

I'm trying to get asyncio work with subprocesses and limitations. I've accomplish this in functional way, but when I tried to implement same logic in opp style several problems showd up. Mostly Can't pickle coroutine/generator errors. I tracked some of theese, but not all
import asyncio
from concurrent.futures import ProcessPoolExecutor
from itertools import islice
from random import randint
class async_runner(object):
def __init__(self):
self.futures = [] # container to store current futures
self.futures_total = []
self.loop = asyncio.get_event_loop() # main event_loop
self.executor = ProcessPoolExecutor()
self.limit = 1
def run(self, func, *args):
temp_loop = asyncio.new_event_loop()
try:
coro = func(*args)
asyncio.set_event_loop(temp_loop)
ret = temp_loop.run_until_complete(coro)
return ret
finally:
temp_loop.close()
def limit_futures(self, futures, limit):
self.futures_total = iter(futures)
self.futures = [future for future in islice(self.futures_total,0,limit)]
async def first_to_finish():
while True:
await asyncio.sleep(0)
for f in self.futures:
if f.done(): # here raised TypeError: can't pickle coroutine objects
print(f.done())
self.futures.remove(f)
try:
#newf = next(self.futures_total)
#self.futures.append(newf)
print(f.done())
except StopIteration as e:
pass
return f.result()
while len(self.futures) > 0:
yield first_to_finish()
async def run_limited(self, func, args, limit):
self.limit = int(limit)
self.futures_total = (self.loop.run_in_executor(self.executor, self.run, func, x) for x in range(110000,119990))
for ret in self.limit_futures(self.futures_total, 4): # limitation - 4 per all processes
await ret
def set_execution(self, func, args, limit):
ret = self.loop.run_until_complete(self.run_limited(func, args, limit))
return ret
async def asy(x):
print('enter: ', x)
await asyncio.sleep(randint(1,3))
print('finishing ', x)
return x
runner = async_runner()
ret = runner.set_execution(asy,urls,2)
print(ret)
But this works fine:
import asyncio
from concurrent.futures import ProcessPoolExecutor
from itertools import islice
import time
async def asy(x):
print('enter: ', x)
await asyncio.sleep(1)
print('finishing ', x)
return x
def run(corofn, *args):
loop = asyncio.new_event_loop()
try:
coro = corofn(*args)
asyncio.set_event_loop(loop)
ret = loop.run_until_complete(coro)
#print(ret)
return ret
finally:
loop.close()
def limit_futures(futures, limit):
futures_sl = [
c for c in islice(futures, 0, limit)
]
print(len(futures_sl))
async def first_to_finish(futures):
while True:
await asyncio.sleep(0)
for f in futures_sl:
if f.done():
futures_sl.remove(f)
try:
newf = next(futures)
futures_sl.append(newf)
except StopIteration as e:
pass
return f.result()
while len(futures_sl) > 0:
yield first_to_finish(futures)
async def main():
loop = asyncio.get_event_loop()
executor = ProcessPoolExecutor()
futures = (loop.run_in_executor(executor, run, asy, x) for x in range(110000,119990))
'''
CASE balls to the wall!
await asyncio.gather(*futures)
'''
for ret in limit_futures(futures, 4): # limitation - 4 per all processes
await ret
if __name__ == '__main__':
start = time.time()
'''
# CASE single
ret = [asy(x) for x in range(510000,510040)]
exit()
'''
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
print("Elapsed time: {:.3f} sec".format(time.time() - start))
I've cant understand why multiprocessing module trying to pickle anything only when objects are in use, but not in any scenario
The reason why multiprocessing needs to pickle the async_runner instance is because self.runner is a bound method, meaning that it "contains" the async_runner instance.
Since you're not actually using self in the run method, you can just make it a staticmethod to avoid this problem.

Categories

Resources