If I have a slow consumer of an async generator that emits values at a quick frequency, and I only care about consuming the latest value (i.e. I'm not fussed about dropping values), is there a way to achieve this in an eloquent way? I've taken a look at aiostream but I couldn't seem to find anything that fits.
Here is a simple example:
import asyncio
import aiostream
async def main():
xs = aiostream.stream.count(interval=0.2)
async with xs.stream() as stream:
async for x in stream: # do something here to drop updates that aren't processed in time
print(x)
await asyncio.sleep(1.0)
if __name__ == "__main__":
asyncio.run(main())
I propose you to use a class that handles the external generator, since I don't know any source to do that.
The class can consume internally the generator in a task and keep only the last value. It's going to be like a wrapper over the generator you really want to consume.
import asyncio
class RelaxedGenerator:
def __init__(self, async_gen):
self.last_value = None # the last value generated
self.consumed_last = True # flags the last value as consumed
self.async_gen = async_gen # generator which we can drop values
self.exhausted = False # flags the generator as fully consumed
#classmethod
async def start(cls, async_gen):
self = cls(async_gen())
asyncio.create_task(self.generate())
return self
async def generate(self):
# here you can consume the external async generator
# and save only the last value for further process
while True:
try:
self.last_value = await self.async_gen.__anext__()
self.consumed_last = False
except StopAsyncIteration:
self.exhausted = True
break
async def stream(self):
while not self.exhausted:
if self.consumed_last:
await asyncio.sleep(0.01) # avoids block the loop
continue
self.consumed_last = True
yield self.last_value
Testing with a simple generator:
import asyncio
from random import uniform
async def numbers_stream(max_=100):
next_int = -1
while next_int < max_:
next_int += 1
yield next_int
await asyncio.sleep(0.2)
async def main():
gen = await RelaxedGenerator.start(numbers_stream)
async for value in gen.stream():
print(value, end=", ", flush=True)
await asyncio.sleep(uniform(1, 2))
asyncio.run(main())
Output:
0, 6, 15, 21, 28, 38, 43, 48, 57, 65, 73, 81, 89, 96,
Other things to keep in mind is if you want to process the last value or if the generator you are working with is going to be exhausted or not in practice. Here I assume that you don't care about last value and the generator can be exhausted.
You could add a queue between your producer and consumer which forgets old results. Unfortunately, there is no implementation for it in the standard library, but it is almost there. If you check the implementation of asyncio.Queue you will notice the use of collections.deque, see https://github.com/python/cpython/blob/3.10/Lib/asyncio/queues.py#L49.
The collections.deque takes the optional argument maxlen to discard previously added items, see https://docs.python.org/3/library/collections.html#collections.deque.
Making use of it, enables us to create our custom queue, which only keeps the last n items.
import asyncio
import collections
class RollingQueue(asyncio.Queue):
def _init(self, maxsize):
self._queue = collections.deque(maxlen=maxsize)
def full(self):
return False
Now you could use this queue as follows:
async def numbers(nmax):
for n in range(nmax):
yield n
await asyncio.sleep(0.3)
async def fill_queue(producer, queue):
async for item in producer:
queue.put_nowait(item)
queue.put_nowait(None)
queue1 = RollingQueue(1)
numgen = numbers(10)
task = fill_queue(numgen, queue1)
asyncio.create_task(task)
while True:
res = await queue1.get()
if res is None:
break
print(res)
await asyncio.sleep(1)
Where I set the queue size to 1 to just keep the last item as required in your question.
Using a combination of the two provided answers, I came up with the following solution which seems to work quite well:
import asyncio
import aiostream
import collections
class RollingQueue(asyncio.Queue):
def _init(self, maxsize):
self._queue = collections.deque(maxlen=maxsize)
def full(self):
return False
#aiostream.operator(pipable=True)
async def drop_stream(source, max_n=1):
queue = RollingQueue(max_n)
exhausted = False
async def inner_task():
async with aiostream.streamcontext(source) as streamer:
async for item in streamer:
queue.put_nowait(item)
nonlocal exhausted
exhausted = True
task = asyncio.create_task(inner_task())
try:
while not exhausted:
item = await queue.get()
yield item
finally:
task.cancel()
async def main():
xs = aiostream.stream.count(interval=0.2) | drop_stream.pipe(1) | aiostream.pipe.take(5)
async with xs.stream() as stream:
async for x in stream:
print(x)
await asyncio.sleep(1.0)
if __name__ == "__main__":
asyncio.run(main())
Related
I need to implement the algorithm using asyncio with the following conditions (behaviour):
check the list of arguments is not empty, if empty finish execution
pop next argument from the list of arguments create coroutine with
this argument and schedule it for execution "at same time" can't be
executed no more than 'async_level' coroutines when coroutine finish
execution -> go to the step 1
It is necessary not to plan to complete all tasks at once (as with asyncio.gather), but to do it in parts. When the next task finishes execution, a new one takes its place.
I tried to do it with asyncio.as_completed() but it doesn't actually work as expected:
async_level = 4
params_count = 10
params = [i for i in range(1, params_count + 1)]
tasks = {asyncio.create_task(job(param)) for param in params[0: async_level]}
params = iter(params[async_level:])
while True:
# NOTE: It wont work, because you can't add task in 'tasks' after 'as_completed' is invoked, so execution actually ends when the last coroutine in the 'as_completed' ends
for task in asyncio.as_completed(tasks):
print(f"len(tasks) = {len(tasks)}")
await task
try:
param = next(params)
tasks.add(asyncio.create_task(job(param)))
except StopIteration:
print("StopIteration")
break
Also, I tried to implement it using asyncio.BoundedSemaphore, but first two conditions are not met:
async_level = 4
params_count = 10
params = [i for i in range(1, params_count + 1)]
async def semaphore_job(name, _asyncio_semaphore):
async with _asyncio_semaphore:
await job(name)
asyncio_semaphore = asyncio.BoundedSemaphore(async_level)
jobs = []
# NOTE: This variant schedule all jobs at ones and it's significant drawback because the count of jobs can be overwhelmed
for param in params:
jobs.append(asyncio.ensure_future(semaphore_job(param, asyncio_semaphore)))
await asyncio.gather(*jobs)
I would be grateful for any of your help.
It seems I found the solution myself:
import asyncio
from typing import Callable
from random import randrange
from asyncio import Semaphore, ensure_future, get_event_loop
async def job(name, time_range=10):
timeout = randrange(time_range)
print(f"Task '{name}' started with timeout {timeout}")
await asyncio.sleep(timeout)
print(f"Task '{name}' finished")
return name
async def custom_executor(func: Callable, args: list, async_level: int = 4):
""" Asynchronously executes no more that 'async_level' callables specified by 'func' with corresponding 'args' """
loop = get_event_loop()
sync = Semaphore()
todo = set(args)
doing = set()
def _schedule_task():
if todo:
arg = todo.pop()
fr = func(*arg) if isinstance(arg, (tuple, list, set)) else func(arg)
f = ensure_future(fr, loop=loop)
f.add_done_callback(_on_completion)
doing.add(f)
def _on_completion(f):
doing.remove(f)
sync.release()
_schedule_task()
for _ in range(min(async_level, len(todo))):
_schedule_task()
while True:
if not doing:
break
await sync.acquire()
async def main():
await custom_executor(job, [(1, 3), 7, (8, 2), 12, 5])
if __name__ == '__main__':
asyncio.run(main())
But if you know a better way, please share!
You can create a fixed number of workers and give them tasks using a queue. It's a bit shorter and I find it a bit easier to reason about than your code, which uses callbacks. But YMMV.
async def custom_executor(func, args, async_level=4):
queue = asyncio.Queue(1)
async def worker():
while True:
arg = await queue.get()
fr = func(*arg) if isinstance(arg, (tuple, list, set)) else func(arg)
await fr
queue.task_done()
# create the workers
workers = [asyncio.create_task(worker()) for _ in range(async_level)]
# Feed the workers tasks. Since the queue is bounded, this will also
# wait for previous tasks to finish, similar to what you wanted to
# achieve with as_completed().
for x in args:
await queue.put(x)
await queue.join() # wait for the remaining tasks to finish
# cancel the now-idle workers
for w in workers:
w.cancel()
I am planning to have an asyncio Queue based producer-consumer implementation for a processing of realtime data where sending out data in correct time order is vital. So here is the code snippet of it :
async def produce(Q, n_jobs):
for i in range(n_jobs):
print(f"Producing :{i}")
await Q.put(i)
async def consume(Q):
while True:
n = await Q.get()
print(f"Consumed :{n}")
x = do_sometask_and_return_the_result(n)
print(f"Finished :{n} and Result: {x}")
async def main(loop):
Q = asyncio.Queue(loop=loop, maxsize=3)
await asyncio.wait([produce(Q, 10), consume(Q), consume(Q), consume(Q)])
print("Done")
Here the producer produces data and puts it into the asyncio Queue. I have multiple consumers to consume and process the data. While seeing the outputs, the order is maintained while printing "Consumed :{n}" (as in 1,2,3,4... and so on) , this is completely fine. but, since the function do_sometask_and_return_the_result(n) takes variable time to return the result, the order is not maintained in the next print of n "Finished :{n}" (as in 2,1,4,3,5,...).
Is there any way to synchronize this data as I need to maintain the order of printing the results? I want to see 1,2,3,4,.. sequential prints for 'n' even after do_sometask_and_return_the_result(n).
You could use a priority queue system (using the python heapq library) to reorder your jobs after they are complete. Something like this.
# add these variables at class/global scope
priority_queue = []
current_job_id = 1
job_id_dict = {}
async def produce(Q, n_jobs):
# same as above
async def consume(Q):
while True:
n = await Q.get()
print(f"Consumed :{n}")
x = do_sometask_and_return_the_result(n)
await process_result(n, x)
async def process_result(n, x):
heappush(priority_queue, n)
job_id_dict[n] = x
while current_job_id == priority_queue[0]:
job_id = heappop(priority_queue)
print(f"Finished :{job_id} and Result: {job_id_dict[job_id]}")
current_job_id += 1
async def main(loop):
Q = asyncio.Queue(loop=loop, maxsize=3)
await asyncio.wait([produce(Q, 10), consume(Q), consume(Q), consume(Q)])
print("Done")
For more information on the heapq module: https://docs.python.org/3/library/heapq.html
One of the async function returns the async generator object. I added loop.run_until_complete(func()), but still, it throws the error as "TypeError: A Future, a coroutine or an awaitable is required". Below is the code. I'm trying to fetch the records from Neo4j asynchronously. I got the async "Neo4j class from a GitHub. I'm new to this async concept.
from concurrent import futures
import neo4j
from neo4j import GraphDatabase, basic_auth
import time
import traceback
import asyncio
RETRY_WAITS = [0, 1, 4] # How long to wait after each successive failure.
class Neo4j:
"""Neo4j database API."""
def __init__(self, config, loop):
self.config = config
self.loop = loop
self.executor = futures.ThreadPoolExecutor(max_workers=30)
for retry_wait in RETRY_WAITS:
try:
self.init_driver()
break
except:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
print('WARNING: retrying to Init DB; err:')
traceback.print_exc()
time.sleep(retry_wait) # wait for 0, 1, 3... seconds.
def init_driver(self):
auth = basic_auth(self.config['user'], self.config['pass'])
self.driver = GraphDatabase.driver(self.config['url'], auth=auth)
async def afetch_start(self, query):
session = self.driver.session(access_mode=neo4j.READ_ACCESS)
def run():
return session.run(query).records()
return session, await self.loop.run_in_executor(self.executor, run)
async def afetch_iterate(self, session, iter):
def iterate():
try:
return next(iter)
except StopIteration:
return None
while True:
res = await self.loop.run_in_executor(self.executor, iterate)
if res is None:
return
else:
yield dict(res)
async def afetch(self, query):
for retry_wait in RETRY_WAITS:
try:
session, iter = await self.afetch_start(query)
break
except (BrokenPipeError, neo4j.exceptions.ServiceUnavailable) as e:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
await asyncio.sleep(retry_wait)
await self.loop.run_in_executor(self.executor, self.init_driver)
async for x in self.afetch_iterate(session, iter):
yield x
await self.loop.run_in_executor(self.executor, session.close)
async def afetch_one(self, query):
async for i in self.afetch(query):
return i
return None
async def aexec(self, query):
async for i in self.afetch(query):
pass
return
config={'url':"bolt://localhost",'user':'neo4j','pass':'pwd'}
loop=asyncio.get_event_loop()
n=Neo4j(config,loop)
loop.run_until_complete(n.afetch("MATCH(p:Person)-[:Acted_in]->(mv:Movies) RETURN p.name as actors"))
loop.close()
--EDIT
I have modified the code to work properly. The query returns 218K rows and it takes 5 minutes to extract the complete list and the same async operation in C# completes in just 2 sec. Looks like the above code still doesnt go in async
It's very hard to tell what exactly happens without reproducible example, but I'll take a guess. You probably pass async generator object in a loop, you shouldn't do it. A way to work with async generators is to use async for. Here's example:
import asyncio
async def func(): # async generator
yield 1
yield 2
yield 3
async def main():
async for i in func(): # get values from async generator
print(i)
asyncio.run(main()) # can be used instead of loop.run_until_complete(main())
I am implementing an asynchronous iterator to be used with async for which should return a new value at a (mostly) regular interval.
We can illustrate such iterator with a simple clock that will increment a counter every ~n seconds:
import asyncio
class Clock(object):
def __init__(self, interval=1):
self.counter = 0
self.interval = interval
self.tick = asyncio.Event()
asyncio.ensure_future(self.tick_tock())
async def tick_tock(self):
while True:
self.tick.clear()
await asyncio.sleep(self.interval)
self.counter = self.__next__()
self.tick.set()
def __next__(self):
self.counter += 1
return self.counter
def __aiter__(self):
return self
async def __anext__(self):
await self.tick.wait()
return self.counter
Is there a better or cleaner approach than using asyncio.Event? More than one coroutine will async for on this iterator.
In my opinion, your approach is fine. Note that since python 3.6, you can also use asynchronous generators:
async def clock(start=0, step=1, interval=1.):
for i in count(start, step):
yield i
await asyncio.sleep(interval)
However, you won't be able to share them between multiple coroutines. You would have to run the clock in a task and make the data available through an asynchronous iteration interface, which is essentially what you did in your code. Here's a possible implementation.
If you're using Python 3.6+ you can use asynchronous generators which are more readable.
async def Clock(interval=1):
counter = 0
while True:
await asyncio.sleep(interval)
counter += 1
yield counter
async def main():
async for i in Clock(1):
print(i)
if i == 4:
break
if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()
Python3.6 now asynchronous iterables. Is there builtin way to transform a asynchronous iterable to a synchronous iterable.
I currently have this helper function, but it feels very un-pythonic. Is there a better way to do this?
async def aiter_to_list(aiter):
l = []
async for i in aiter:
l.append(i)
return l
From Python 3.6 you can use Asynchronous Comprehensions
async def async_iter():
for i in range(0,5):
yield i
# async comprehension
sync_list = [gen async for gen in async_iter()]
print(sync_list) # [0, 1, 2, 3, 4]
You can use aiostream.stream.list:
from aiostream import stream
async def agen():
yield 1
yield 2
yield 3
async def main():
lst = await stream.list(agen())
print(lst) # prints [1, 2, 3]
More operators and examples in the documentation.
Your "asynchronous to synchronous" helper is itself asynchronous; not a big change at all. In general: no, you cannot make something asynchronous synchronous. An asynchronous value will be supplied "sometime later"; you cannot make that into "now" because the value doesn't exist "now" and you will have to wait for it, asynchronously.
These functions allow you to convert from / to iterable <==> async iterable, not just simple lists.
Basic imports
import asyncio
import threading
import time
DONE = object()
TIMEOUT = 0.001
The function to_sync_iterable will convert any async iterable to a sync iterable:
def to_sync_iterable(async_iterable, maxsize = 0):
def sync_iterable():
queue = asyncio.Queue(maxsize=maxsize)
loop = asyncio.get_event_loop()
t = threading.Thread(target=_run_coroutine, args=(loop, async_iterable, queue))
t.daemon = True
t.start()
while True:
if not queue.empty():
x = queue.get_nowait()
if x is DONE:
break
else:
yield x
else:
time.sleep(utils.TIMEOUT)
t.join()
return sync_iterable()
def _run_coroutine(loop, async_iterable, queue):
loop.run_until_complete(_consume_async_iterable(async_iterable, queue))
async def _consume_async_iterable(async_iterable, queue):
async for x in async_iterable:
await queue.put(x)
await queue.put(DONE)
You can use it like this:
async def slow_async_generator():
yield 0
await asyncio.sleep(1)
yield 1
await asyncio.sleep(1)
yield 2
await asyncio.sleep(1)
yield 3
for x in to_sync_iterable(slow_async_generator()):
print(x)
The function to_async_iterable will convert any sync iterable to an async iterable:
def to_async_iterable(iterable, maxsize = 0):
async def async_iterable():
queue = asyncio.Queue(maxsize=maxsize)
loop = asyncio.get_event_loop()
task = loop.run_in_executor(None, lambda: _consume_iterable(loop, iterable, queue))
while True:
x = await queue.get()
if x is DONE:
break
else:
yield x
await task
return async_iterable()
def _consume_iterable(loop, iterable, queue):
for x in iterable:
while True:
if not queue.full():
loop.call_soon_threadsafe(queue.put_nowait, x)
break
else:
time.sleep(TIMEOUT)
while True:
if not queue.full():
loop.call_soon_threadsafe(queue.put_nowait, DONE)
break
else:
time.sleep(TIMEOUT)
This one is specially useful for asyncio programs because it won't block the event loop even if the the sync iterable blocks. You can use it like this:
def slow_sync_generator():
yield 0
time.sleep(1)
yield 1
time.sleep(1)
yield 2
time.sleep(1)
yield 3
async def async_task():
async for x in to_async_iterable(slow_sync_generator()):
print(x)
asyncio.get_event_loop().run_until_complete(async_task())