Time single iteration of async for loop without global state - python

I'm trying to time, and record in the program, how long it takes to complete a single iteration of an async generator + for loop, without resorting to global state. For example, given
import asyncio
async def run():
async for page in pull():
await push(page)
async def pull():
for i in range(0, 3):
print(f'Start of iteration {i}')
await asyncio.sleep(1)
yield i
async def push(i):
await asyncio.sleep(1)
print(f'End of iteration {i}')
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
that outputs
Start of iteration 0
End of iteration 0
Start of iteration 1
End of iteration 1
Start of iteration 2
End of iteration 2
I would like to record the time between each Start of iteration i and the next End of iteration i.
How can this be done? Ideally without global state, and ideally where the timing code is somewhat decoupled from the rest (e.g. with decorators)

Instead of passing around the i as an integer, you can pass a structure with some state. Here is an example of augmenting the i with some timing information:
Timing info:
class TimerInt(int):
def __new__(cls, *args, **kwargs):
my_int = super(TimerInt, cls).__new__(cls, *args, **kwargs)
my_int._start_time = time.time()
return my_int
#property
def time_since_create(self):
return time.time() - self._start_time
This gives the int a time_since_create property that can be used for some crude timing.
Test Code:
import asyncio
import time
async def run():
async for page in pull():
await push(page)
async def pull():
for i in range(0, 3):
i = TimerInt(i)
print(f'Start of iteration {i}')
await asyncio.sleep(1)
yield i
async def push(i):
await asyncio.sleep(1)
print(f'End of iteration {i}: {i.time_since_create:.2f}')
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
Test Results:
Start of iteration 0
End of iteration 0: 2.01
Start of iteration 1
End of iteration 1: 2.00
Start of iteration 2
End of iteration 2: 2.00

If you just care about the costed time, maybe you can calculate them in run. There is an example, I think it should work as if we just look at this async for loop, the process is lineal.
At first, end - start should equal to the time from the first iteration of pull() to the end of push(page). And then use the end time as the second start time, so this time end - start should equal to the time from the second iteration of pull() to the end of push(page) again.
async def run():
start = time.time()
async for page in pull():
await push(page)
end = time.time()
print("Cost {end - start} seconds")
start = end
If I am wrong, correct me please.

Related

How to measure time spent in blocking code while using asyncio in Python?

I'm currently migrating some Python code that used to be blocking to use asyncio with async/await. It is a lot of code to migrate at once so I would prefer to do it gradually and have metrics. With that thing in mind I want to create a decorator to wrap some functions and know how long they are blocking the event loop. For example:
def measure_blocking_code(f):
def wrapper(*args, **kwargs):
# ?????
# It should measure JUST 1 second
# not 5 which is what the whole async function takes
return wrapper
#measure_blocking_code
async def my_function():
my_blocking_function() # Takes 1 seconds
await my_async_function() # Takes 2 seconds
await my_async_function_2() # Takes 2 seconds
I know the event loop has a debug function that already report this, but I need to get that information for specific functions.
TLDR;
This decorator does the job:
def measure_blocking_code(f):
async def wrapper(*args, **kwargs):
t = 0
coro = f()
try:
while True:
t0 = time.perf_counter()
future = coro.send(None)
t1 = time.perf_counter()
t += t1 - t0
while not future.done():
await asyncio.sleep(0)
future.result() # raises exceptions if any
except StopIteration as e:
print(f'Function took {t:.2e} sec')
return e.value
return wrapper
Explanation
This workaround exploits the conventions used in asyncio implementation in cPython. These conventions are a superset of PEP-492. In other words:
You can generally use async/await without knowing these details.
This might not work with other async libraries like trio.
An asyncio coro object (coro) can be executed by calling .send() member. This will only run the blocking code, until an async call yields a Future object. By only measuring the time spent in .send(), the duration of the blocking code can be determined.
I finally found the way. I hope it helps somebody
import asyncio
import time
def measure(f):
async def wrapper(*args, **kwargs):
coro_wrapper = f(*args, **kwargs).__await__()
fut = asyncio.Future()
total_time = 0
def done(arg=None):
try:
nonlocal total_time
start_time = time.perf_counter()
next_fut = coro_wrapper.send(arg)
end_time = time.perf_counter()
total_time += end_time - start_time
next_fut.add_done_callback(done)
except StopIteration:
fut.set_result(arg)
except Exception as e:
fut.set_exception(e)
done()
res = await fut
print('Blocked for: ' + str(total_time) + ' seconds')
return res
return wrapper

Beginner async/await question for api requests

I want speed up some API requests... for that I try to figure out how to do and copy some code which run but when I try my own code its no longer asynchrone. Maybe someone find the fail?
Copy Code (guess from stackoverflow):
#!/usr/bin/env python3
import asyncio
#asyncio.coroutine
def func_normal():
print('A')
yield from asyncio.sleep(5)
print('B')
return 'saad'
#asyncio.coroutine
def func_infinite():
for i in range(10):
print("--%d" % i)
return 'saad2'
loop = asyncio.get_event_loop()
tasks = func_normal(), func_infinite()
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print("func_normal()={a}, func_infinite()={b}".format(**vars()))
loop.close()
My "own" code (I need at the end a list returned and merge the results of all functions):
import asyncio
import time
#asyncio.coroutine
def say_after(start,count,say,yep=True):
retl = []
if yep:
time.sleep(5)
for x in range(start,count):
retl.append(x)
print(say)
return retl
def main():
print(f"started at {time.strftime('%X')}")
loop = asyncio.get_event_loop()
tasks = say_after(10,20,"a"), say_after(20,30,"b",False)
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print("func_normal()={a}, func_infinite()={b}".format(**vars()))
loop.close()
c = a + b
#print(c)
print(f"finished at {time.strftime('%X')}")
main()
Or I m completly wrong and should solve that with multithreading? What would be the best way for API requests that returns a list that I need to merge?
Added comment for each section that needs improvement. Removed some to simply code.
In fact, I didn't find any performance uplift with using range() wrapped in coroutine and using async def, might worth with heavier operations.
import asyncio
import time
# #asyncio.coroutine IS DEPRECATED since python 3.8
#asyncio.coroutine
def say_after(wait=True):
result = []
if wait:
print("I'm sleeping!")
time.sleep(5)
print("'morning!")
# This BLOCKs thread, but release GIL so other thread can run.
# But asyncio runs in ONE thread, so this still harms simultaneity.
# normal for is BLOCKING operation.
for i in range(5):
result.append(i)
print(i, end='')
print()
return result
def main():
start = time.time()
# Loop argument will be DEPRECATED from python 3.10
# Make main() as coroutine, then use asyncio.run(main()).
# It will be in asyncio Event loop, without explicitly passing Loop.
loop = asyncio.get_event_loop()
tasks = say_after(), say_after(False)
# As we will use asyncio.run(main()) from now on, this should be await-ed.
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print(f"Took {time.time() - start:5f}")
loop.close()
main()
Better way:
import asyncio
import time
async def say_after(wait=True):
result = []
if wait:
print("I'm sleeping!")
await asyncio.sleep(2) # 'await' a coroutine version of it instead.
print("'morning!")
# wrap iterator in generator - or coroutine
async def asynchronous_range(end):
for _i in range(end):
yield _i
# use it with async for
async for i in asynchronous_range(5):
result.append(i)
print(i, end='')
print()
return result
async def main():
start = time.time()
tasks = say_after(), say_after(False)
a, b = await asyncio.gather(*tasks)
print(f"Took {time.time() - start:5f}")
asyncio.run(main())
Result
Your code:
DeprecationWarning: "#coroutine" decorator is deprecated since Python 3.8, use "async def" instead
def say_after(wait=True):
I'm sleeping!
'morning!
01234
01234
Took 5.003802
Better async code:
I'm sleeping!
01234
'morning!
01234
Took 2.013863
Note that fixed code now finish it's job while other task is sleeping.

Python asyncio: stop and start a task from another task without lossing state?

I would like to stop a python asyncio task from another task and start it again when some condition in the second task happen.
Please note, than I don't want to cancel the coroutine of the first task (the state of that coroutine when it stopped should be available). Also, I don't care about the exact state the first task is in, I just want the event loop stop running the first task until told otherwise from the second.
I hope this example code helps understanding the problem:
import asyncio
async def coroutine1():
i = 0
while(True):
i += 1
print("coroutine1: " + str(i) )
await asyncio.sleep(1)
async def coroutine2(task1):
i = 0
while(True):
i += 1
if (i > 3) and (i<10):
pass #TODO: stop task1 here
else:
pass #TODO: Maybe check if task1 is running
#and start task1 again if it's not?
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
loop = asyncio.get_event_loop()
task1 = loop.create_task(coroutine1())
task2 = loop.create_task(coroutine2(task1))
done, pending = await asyncio.wait(
[task1, task2]
, return_when=asyncio.FIRST_COMPLETED,)
loop = asyncio.get_event_loop()
loop.run_until_complete(main_coroutine())
loop.close()
I would like to stop a python asyncio task from another task and start it again when some condition in the second task happen.
I assume you control the task creation, but don't want to touch the implementation of the coroutine. In your case, you control coroutine2 and main_coroutine, but not the insides of coroutine1.
In that case you can wrap the coroutine in a an __await__ that, instead of the normal yield from loop, checkes your stopped flag and waits for a future that tells it when to resume.
class Stoppable:
def __init__(self, coro):
self._coro_iter = coro.__await__()
self._stopped = None
def __await__(self):
while True:
while self._stopped:
print('awaiting stopped')
yield from self._stopped.__await__()
try:
v = next(self._coro_iter)
except StopIteration as e:
return v
yield v
def stop(self):
loop = asyncio.get_event_loop()
self._stopped = loop.create_future()
def start(self):
if self._stopped is not None:
self._stopped.set_result(None)
self._stopped = None
You can use the wrapper to modify coroutine2 to stop and resume the execution of coroutine1 at will:
async def coroutine2(s):
i = 0
while True:
i += 1
if i == 3:
print('stopping coroutine1')
s.stop()
elif i == 10:
print('restarting coroutine1')
s.start()
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
loop = asyncio.get_event_loop()
s = Stoppable(coroutine1())
fut1 = asyncio.ensure_future(s)
task2 = loop.create_task(coroutine2(s))
done, pending = await asyncio.wait(
[fut1, task2], return_when=asyncio.FIRST_COMPLETED)
The way wrapper works is by unrolling the loop inherent in yield from. For example, to just delegate __await__ to another coroutine, one would write:
def __await__(self):
yield from self._coro_iter
Written like this, you can't implement stopping because the yield from contains an implicit loop that yields all the values produced by the underlying iterator - something like:
def __await__(self):
while True:
try:
v = next(self._coro_iter)
except StopIteration as e:
return e.value
yield v
Taken like this, it is easy enough to add an if that checks for _stopped at each iteration pass, meaning each time we're resumed by the event loop. The remaining hurdle is that one cannot just busy-loop until _stopped is rescinded - we must yield something else to allow the event loop to resume running other coroutines. Fortunately that is easily achieved by making _stopped a future, and yielding from the future. When the future's result is set, we will be automatically resumed and continue executing the wrapped coroutine.
It seems it can't be done.
It's possible to cancel an ongoing task with task1.cancel() and it's possible to create a new task with asyncio.get_event_loop().create_task(newTask).
It's also possible to get the coroutine of a running task with task1._coro but if we try to create a task again with a previously scheduled coroutine we will get a RuntimeError exception. This the discussion where they decided it: https://bugs.python.org/issue25887
Finally, a possible way of accomplishing the desire effect is using a asyncio.Queue object:
import asyncio
async def coroutine1(stop_queue):
i = 0
while(True):
if stop_queue.empty(): #if the queue is empty keep working.
i += 1
print("coroutine1: " + str(i) )
await asyncio.sleep(1)
async def coroutine2(stop_queue):
i = 0
while(True):
i += 1
if i == 3:
await stop_queue.put("whatever..") #put something in the queue
if i == 11:
await stop_queue.get() #take something from the queue
print("coroutine2: " + str(i) )
await asyncio.sleep(1)
async def main_coroutine():
stop_queue = asyncio.Queue()
done, pending = await asyncio.wait(
[coroutine1(stop_queue), coroutine2(stop_queue)]
, return_when=asyncio.ALL_COMPLETED,)
loop = asyncio.get_event_loop()
loop.run_until_complete(main_coroutine())
loop.close()

Python asyncio - consumer blocking with asyncio.Event()

I have a program with one producer and two slow consumers and I'd like to rewrite it with coroutines in such way that each consumer will handle only last value (i.e. skip new values generated during processing the old ones) produced for it (I used threads and threading.Queue() but with it blocks on put(), cause the queue will be full most of the time).
After reading answer to this question I decided to use asyncio.Event and asyncio.Queue. I wrote this prototype program:
import asyncio
async def l(event, q):
h = 1
while True:
# ready
event.set()
# get value to process
a = await q.get()
# process it
print(a * h)
h *= 2
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
and I have noticed that l coroutine blocks on q.get() line and doesn't print anything.
It works as I expect after adding asyncio.sleep() in both (I get 1,11,21,...):
import asyncio
import time
async def l(event, q):
h = 1
a = 1
event.set()
while True:
# await asyncio.sleep(1)
a = await q.get()
# process it
await asyncio.sleep(1)
print(a * h)
event.set()
async def m(event, q):
i = 1
while True:
# pass element to consumer, when it's ready
if event.is_set():
await q.put(i)
event.clear()
await asyncio.sleep(0.1)
# produce value
i += 1
el = asyncio.get_event_loop()
ev = asyncio.Event()
qu = asyncio.Queue(2)
tasks = [
asyncio.ensure_future(l(ev, qu)),
asyncio.ensure_future(m(ev, qu))
]
el.run_until_complete(asyncio.gather(*tasks))
el.close()
...but I'm looking for solution without it.
Why is it so? How can I fix it? I think I cannot call await l() from m as both of them have states (in original program the first draws solution with PyGame and the second plots results).
The code is not working as expected as the task running the m function is never stopped. The task will continue increment i in the case that event.is_set() == False. Because this task is never suspended, the task running function l will never be called. Therefore, you need a way to suspend the task running function m. One way of suspending is awaiting another coroutine, that is the reason why a asyncio.sleep works as expected.
I think the following code will work as you expect. The LeakyQueue will ensure that only the last value from the producer will be processed by the consumer. As the complexity is very symmetric, the consumer will consume all values produced by the producer. If you increase the delay argument, you can simulate that the consumer only processes the last value created by the producer.
import asyncio
class LeakyQueue(asyncio.Queue):
async def put(self, item):
if self.full():
await self.get()
await super().put(item)
async def consumer(queue, delay=0):
h = 1
while True:
a = await queue.get()
if delay:
await asyncio.sleep(delay)
print ('consumer', a)
h += 2
async def producer(queue):
i = 1
while True:
await asyncio.ensure_future(queue.put(i))
print ('producer', i)
i += 1
loop = asyncio.get_event_loop()
queue = LeakyQueue(maxsize=1)
tasks = [
asyncio.ensure_future(consumer(queue, 0)),
asyncio.ensure_future(producer(queue))
]
loop.run_until_complete(asyncio.gather(*tasks))

Asyncio & rate limiting

I writing an app based on the asyncio framework. This app interacts with an API that has a rate limit(maximum 2 calls per sec). So I moved methods which interact with an API to the celery for using it as rate limiter. But it is looks like as an overhead.
There are any ways to create a new asyncio event loop(or something else) that guarantees execution of a coroutins not more then n per second?
The accepted answer is accurate. Note however that, usually, one would want to get as close to 2QPS as possible. This method doesn't offer any parallelisation, which could be a problem if make_io_call() takes longer than a second to execute. A better solution would be to pass a semaphore to make_io_call, that it can use to know whether it can start executing or not.
Here is such an implementation: RateLimitingSemaphore will only release its context once the rate limit drops below the requirement.
import asyncio
from collections import deque
from datetime import datetime
class RateLimitingSemaphore:
def __init__(self, qps_limit, loop=None):
self.loop = loop or asyncio.get_event_loop()
self.qps_limit = qps_limit
# The number of calls that are queued up, waiting for their turn.
self.queued_calls = 0
# The times of the last N executions, where N=qps_limit - this should allow us to calculate the QPS within the
# last ~ second. Note that this also allows us to schedule the first N executions immediately.
self.call_times = deque()
async def __aenter__(self):
self.queued_calls += 1
while True:
cur_rate = 0
if len(self.call_times) == self.qps_limit:
cur_rate = len(self.call_times) / (self.loop.time() - self.call_times[0])
if cur_rate < self.qps_limit:
break
interval = 1. / self.qps_limit
elapsed_time = self.loop.time() - self.call_times[-1]
await asyncio.sleep(self.queued_calls * interval - elapsed_time)
self.queued_calls -= 1
if len(self.call_times) == self.qps_limit:
self.call_times.popleft()
self.call_times.append(self.loop.time())
async def __aexit__(self, exc_type, exc, tb):
pass
async def test(qps):
executions = 0
async def io_operation(semaphore):
async with semaphore:
nonlocal executions
executions += 1
semaphore = RateLimitingSemaphore(qps)
start = datetime.now()
await asyncio.wait([io_operation(semaphore) for i in range(5*qps)])
dt = (datetime.now() - start).total_seconds()
print('Desired QPS:', qps, 'Achieved QPS:', executions / dt)
if __name__ == "__main__":
asyncio.get_event_loop().run_until_complete(test(100))
asyncio.get_event_loop().close()
Will print Desired QPS: 100 Achieved QPS: 99.82723898022084
I believe you are able to write a cycle like this:
while True:
t0 = loop.time()
await make_io_call()
dt = loop.time() - t0
if dt < 0.5:
await asyncio.sleep(0.5 - dt, loop=loop)

Categories

Resources