Design consideration for pythonic async eventsystem - python

I have a tiny event async eventsystem like this:
from collections import defaultdict
from uuid import uuid4
class EventSystem:
def __init__(self):
self.handlers = defaultdict(dict)
def register_handler(self, event, callback, register_id=None):
register_id = register_id or uuid4()
self.handlers[event][register_id] = callback
return register_id
def unregister_handler(self, event, register_id):
del self.handlers[event][register_id]
def clear_handlers(self, event):
handler_register_ids = list(self.handlers[event].keys())
for register_id in handler_register_ids:
self.unregister_handler(event, register_id)
async def fire_event(self, event, data):
handlers = self.handlers[event]
for register_id, callback in handlers.items():
await callback(data)
return len(handlers)
Which currently forces handlers to be async functions.
I cannot decide what is more pythonic, enforcing this policy, and having an async2sync wrapper for sync functions:
async def async2sync(func, *args, **kwargs):
return func(*args, **kwargs)
Or changing fire_event to checking the handler type, using inspect.isawaitable:
async def fire_event(self, event, data):
handlers = self.handlers[event]
for register_id, callback in handlers.items():
ret = callback(data)
if inspect.isawaitable(ret):
await ret
return len(handlers)
I am not worried about long-running or blocking sync functions.

Since the wrapper in your first approach wraps sync functions into async, shouldn't it be called sync2async rather than async2sync?
If long-running or blocking sync functions are not a concern, both approaches are fine. Both have benefits and drabacks. The first approach is a bit more minimalistic and easier to reason about. The second approach is a bit more clever (which can bite you when you least expect), but also much more pleasant to use because you can just write either kind of function for handler and things will "just work". If the user of your API is someone other than yourself, they will probably appreciate it.
TL;DR Either is fine; I'd personally probably go with the second.

Related

How to call asynchronous function in Django?

The following doesn't execute foo and gives
RuntimeWarning: coroutine 'foo' was never awaited
# urls.py
async def foo(data):
# process data ...
#api_view(['POST'])
def endpoint(request):
data = request.data.get('data')
# How to call foo here?
foo(data)
return Response({})
Django is an synchronous language but it supports Async behavior.
Sharing the code snippet which may help.
import asyncio
from channels.db import database_sync_to_async
def get_details(tag):
response = another_sync_function()
# Creating another thread to execute function
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
async_result = loop.run_until_complete(remove_tags(response, tag))
loop.close()
# Async function
async def remove_tags(response, tag_id):
// do something here
# calling another function only for executing database queries
await tag_query(response, tag_id)
#database_sync_to_async
def tag_query(response, tag_id):
Mymodel.objects.get(all_tag_id=tag_id).delete()
This way i called async function in synchronous function.
Reference for database sync to async decorator
Found a way to do it.
Create another file bar.py in the same directory as urls.py.
# bar.py
def foo(data):
// process data
# urls.py
from multiprocessing import Process
from .bar import foo
#api_view(['POST'])
def endpoint(request):
data = request.data.get('data')
p = Process(target=foo, args=(data,))
p.start()
return Response({})
You can't await foo in this context. Seeing that Django is mainly a synchronous library, it doesn't interact well with asynchronous code. The best advice I can give it to try avoid using an asynchronous function here, or perhaps use another method of concurrency (ie threading or multiprocessing).
Note: there is a great answer given about Django's synchronous nature that can be found here: Django is synchronous or asynchronous?.

Write back through the callback attached to IOLoop in Tornado

There is a tricky post handler, sometimes it can take a lots of time (depending on a input values), sometimes not.
What I want is to write back whenever 1 second passes, dynamically allocating the response.
def post():
def callback():
self.write('too-late')
self.finish()
timeout_obj = IOLoop.current().add_timeout(
dt.timedelta(seconds=1),
callback,
)
# some asynchronous operations
if not self.request.connection.stream.closed():
self.write('here is your response')
self.finish()
IOLoop.current().remove_timeout(timeout_obj)
Turns out I can't do much from within callback.
Even raising an exception is suppressed by the inner context and won't be passed through the post method.
Any other ways to achieve the goal?
Thank you.
UPD 2020-05-15:
I found similar question
Thanks #ionut-ticus, using with_timeout() is much more convenient.
After some tries, I think I came really close to what i'm looking for:
def wait(fn):
#gen.coroutine
#wraps(fn)
def wrap(*args):
try:
result = yield gen.with_timeout(
dt.timedelta(seconds=20),
IOLoop.current().run_in_executor(None, fn, *args),
)
raise gen.Return(result)
except gen.TimeoutError:
logging.error('### TOO LONG')
raise gen.Return('Next time, bro')
return wrap
#wait
def blocking_func(item):
time.sleep(30)
# this is not a Subprocess.
# It is a file IO and DB
return 'we are done here'
Still not sure, should wait() decorator being wrapped in a
coroutine?
Some times in a chain of calls of a blocking_func(), there can
be another ThreadPoolExecutor. I have a concern, would this work
without making "mine" one global, and passing to the
Tornado's run_in_executor()?
Tornado: v5.1.1
An example of usage of tornado.gen.with_timeout. Keep in mind the task needs to be async or else the IOLoop will be blocked and won't be able to process the timeout:
#gen.coroutine
def async_task():
# some async code
#gen.coroutine
def get(self):
delta = datetime.timedelta(seconds=1)
try:
task = self.async_task()
result = yield gen.with_timeout(delta, task)
self.write("success")
except gen.TimeoutError:
self.write("timeout")
I'd advise to use https://github.com/aio-libs/async-timeout:
import asyncio
import async_timeout
def post():
try:
async with async_timeout.timeout(1):
# some asynchronous operations
if not self.request.connection.stream.closed():
self.write('here is your response')
self.finish()
IOLoop.current().remove_timeout(timeout_obj)
except asyncio.TimeoutError:
self.write('too-late')
self.finish()

Python coroutines: Release context manager when pausing

Background: I'm a very experienced Python programmer who is completely clueless about the new coroutines/async/await features. I can't write an async "hello world" to save my life.
My question is: I am given an arbitrary coroutine function f. I want to write a coroutine function g that will wrap f, i.e. I will give g to the user as if it was f, and the user will call it and be none the wiser, since g will be using f under the hood. Like when you decorate a normal Python function to add functionality.
The functionality that I want to add: Whenever the program flow goes into my coroutine, it acquires a context manager that I provide, and as soon as program flow goes out of the coroutine, it releases that context manager. Flow comes back in? Re-acquire the context manager. It goes back out? Re-release it. Until the coroutine is completely finished.
To demonstrate, here is the described functionality with plain generators:
def generator_wrapper(_, *args, **kwargs):
gen = function(*args, **kwargs)
method, incoming = gen.send, None
while True:
with self:
outgoing = method(incoming)
try:
method, incoming = gen.send, (yield outgoing)
except Exception as e:
method, incoming = gen.throw, e
Is it possible to do it with coroutines?
Coroutines are built on iterators - the __await__ special method is a regular iterator. This allows you to wrap the underlying iterator in yet another iterator. The trick is that you must unwrap the iterator of your target using its __await__, then re-wrap your own iterator using your own __await__.
The core functionality that works on instantiated coroutines looks like this:
class CoroWrapper:
"""Wrap ``target`` to have every send issued in a ``context``"""
def __init__(self, target: 'Coroutine', context: 'ContextManager'):
self.target = target
self.context = context
# wrap an iterator for use with 'await'
def __await__(self):
# unwrap the underlying iterator
target_iter = self.target.__await__()
# emulate 'yield from'
iter_send, iter_throw = target_iter.send, target_iter.throw
send, message = iter_send, None
while True:
# communicate with the target coroutine
try:
with self.context:
signal = send(message)
except StopIteration as err:
return err.value
else:
send = iter_send
# communicate with the ambient event loop
try:
message = yield signal
except BaseException as err:
send, message = iter_throw, err
Note that this explicitly works on a Coroutine, not an Awaitable - Coroutine.__await__ implements the generator interface. In theory, an Awaitable does not necessarily provide __await__().send or __await__().throw.
This is enough to pass messages in and out:
import asyncio
class PrintContext:
def __enter__(self):
print('enter')
def __exit__(self, exc_type, exc_val, exc_tb):
print('exit via', exc_type)
return False
async def main_coro():
print(
'wrapper returned',
await CoroWrapper(test_coro(), PrintContext())
)
async def test_coro(delay=0.5):
await asyncio.sleep(delay)
return 2
asyncio.run(main_coro())
# enter
# exit via None
# enter
# exit <class 'StopIteration'>
# wrapper returned 2
You can delegate the wrapping part to a separate decorator. This also ensures that you have an actual coroutine, not a custom class - some async libraries require this.
from functools import wraps
def send_context(context: 'ContextManager'):
"""Wrap a coroutine to issue every send in a context"""
def coro_wrapper(target: 'Callable[..., Coroutine]') -> 'Callable[..., Coroutine]':
#wraps(target)
async def context_coroutine(*args, **kwargs):
return await CoroWrapper(target(*args, **kwargs), context)
return context_coroutine
return coro_wrapper
This allows you to directly decorate a coroutine function:
#send_context(PrintContext())
async def test_coro(delay=0.5):
await asyncio.sleep(delay)
return 2
print('async run returned:', asyncio.run(test_coro()))
# enter
# exit via None
# enter
# exit via <class 'StopIteration'>
# async run returned: 2

Creating a new logger for each async function invocation, good idea or not?

When writing Python asyncio programs, often there is an async function that have many invocations running concurrently. I want to add some logging to this function, but the logging output from different invocations will be interleaved, making it hard to follow. My current solution is to somehow create a unique name for each invocation, and log that name each time, like this:
async def make_request(args):
logger = logging.getLogger('myscript.request')
log_name = unique_name()
logger.debug('[%s] making request with args %r', log_name, args)
response = await request(args)
logger.debug('[%s] response: %r', log_name, response)
However, having to put log_name in every logging call gets tiring pretty quickly. To save those keystrokes, I came up with a different solution, creating a new logger with a unique name for each invocation:
async def make_request(args):
logger = logging.getLogger(f'myscript.request.{unique_name()}')
logger.debug('making request with args %r', args)
response = await request(args)
logger.debug('response: %r', response)
Are there any down sides to this approach? The only thing I can think of is that creating a new logger may be expensive, but is that actually the case? Are there any pitfalls I'm not seeing?
Are there any down sides to [creating a new logger for each coroutine]?
Other than the possible price of creating a logger, another downside is that the logger you create stays associated with the unique name forever and is never destroyed, so you effectively have a memory leak. This is explicitly promised by the documentation:
Multiple calls to getLogger() with the same name will always return a reference to the same Logger object.
I'd recommend just biting the bullet and creating a helper with the desired functionality. Building on Brad Solomon's answer, the wrapper might look like this (untested):
import itertools, weakref, logging
logging.basicConfig(format='%(asctime)-15s %(task_name)s %(message)s')
class TaskLogger:
_next_id = itertools.count().__next__
_task_ids = weakref.WeakKeyDictionary()
def __init__(self):
self._logger = logging.getLogger('myscript.request')
def _task_name(self):
task = asyncio.current_task()
if task not in self._task_ids:
self._task_ids[task] = self._next_id()
return f'task-{self._task_ids[task]}'
def debug(self, *args, **kwargs):
self._logger.debug(*args, task_name=self._task_name(), **kwargs)
# the same for info, etc.
logger = TaskLogger()
Instead of creating a new logger, you may want to consider taking advantage of using custom attributes in the log message, via the extra parameter:
For example:
FORMAT = '%(asctime)-15s %(unique_name)s %(message)s'
# [Configure/format loggers & handlers]
Then within the coroutine call logging a debug level message would look something like:
logger.debug('making request with args %r', args, extra={'unique_name': unique_name())
One other thing to keep in mind: unique_name() could get expensive if you're making a lot of requests. A common pattern when creating concurrency via multiprocessing is to log the calling process ID via os.getpid(). With asyncio, perhaps a very rough cousin would be some identifier for the current Task, which you could get to via asyncio.current_task(). Each task has a _name attribute that should be unique because it calls an incrementing _task_name_counter():
class Task(futures._PyFuture): # Inherit Python Task implementation
def __init__(self, coro, *, loop=None, name=None):
# ...
if name is None:
self._name = f'Task-{_task_name_counter()}'
else:
self._name = str(name)
I was looking for similar solution to log from multiple async tasks.
As mentioned above, identifying which line is from which task is impossible.
This is my solution for this problem.
If this has downsides or there's a better way, please let me know.
import asyncio
import logging
log_format = '%(levelname).1s,%(thread)s,%(name)s,%(lineno)d: %(message)s'
logging.basicConfig(format=log_format, level=logging.NOTSET)
log = logging.getLogger()
class MyStreamHandler(logging.StreamHandler):
def __init__(self):
super().__init__()
def emit(self, record: logging.LogRecord) -> None:
try:
task = asyncio.current_task(asyncio.get_running_loop())
if task is not None:
record.__setattr__("thread", f"{record.thread}[{task.get_name()}]")
except RuntimeError:
pass
super().emit(record)
handler = MyStreamHandler()
handler.setFormatter(logging.Formatter(log_format))
log.handlers = []
log.addHandler(handler)
def synchronous():
log.debug("I'm synchronous")
async def asynchronous():
log.debug("I'm a debug")
log.warning("I'm a warning")
log.error("I'm an error")
async def main():
loop = asyncio.get_event_loop()
tasks = []
for i in range(5):
tasks.append(asynchronous())
await asyncio.gather(*tasks)
synchronous()
asyncio.run(main())
sample output:
D,6052,root,30: I'm synchronous
D,6052,asyncio,623: Using proactor: IocpProactor
D,6052[Task-2],root,34: I'm a debug
W,6052[Task-2],root,35: I'm a warning
E,6052[Task-2],root,36: I'm an error
D,6052[Task-3],root,34: I'm a debug
W,6052[Task-3],root,35: I'm a warning
E,6052[Task-3],root,36: I'm an error
D,6052[Task-4],root,34: I'm a debug
W,6052[Task-4],root,35: I'm a warning
E,6052[Task-4],root,36: I'm an error
D,6052[Task-5],root,34: I'm a debug
W,6052[Task-5],root,35: I'm a warning
E,6052[Task-5],root,36: I'm an error
D,6052[Task-6],root,34: I'm a debug
W,6052[Task-6],root,35: I'm a warning
E,6052[Task-6],root,36: I'm an error
To provide some closure to this old question, after following Vinay Sajip's comment about LoggerAdapter, I actually found exactly what I want in the documentation. Quoting from the docs:
If you need a different
method, e.g. if you want to prepend or append the contextual
information to the message string, you just need to subclass
LoggerAdapter and override process() to do what you need. Here is a
simple example:
class CustomAdapter(logging.LoggerAdapter):
"""
This example adapter expects the passed in dict-like object to have a
'connid' key, whose value in brackets is prepended to the log message.
"""
def process(self, msg, kwargs):
return '[%s] %s' % (self.extra['connid'], msg), kwargs
which you can use like this:
logger = logging.getLogger(__name__)
adapter = CustomAdapter(logger, {'connid': some_conn_id})
Then any events that you log to the adapter will have the value of
some_conn_id prepended to the log messages.

How to combine Celery with asyncio?

How can I create a wrapper that makes celery tasks look like asyncio.Task? Or is there a better way to integrate Celery with asyncio?
#asksol, the creator of Celery, said this::
It's quite common to use Celery as a distributed layer on top of async I/O frameworks (top tip: routing CPU-bound tasks to a prefork worker means they will not block your event loop).
But I could not find any code examples specifically for asyncio framework.
EDIT: 01/12/2021 previous answer (find it at the bottom) didn't age well therefore I added a combination of possible solutions that may satisfy those who still look on how to co-use asyncio and Celery
Lets quickly break up the use cases first (more in-depth analysis here: asyncio and coroutines vs task queues):
If the task is I/O bound then it tends to be better to use coroutines and asyncio.
If the task is CPU bound then it tends to be better to use Celery or other similar task management systems.
So it makes sense in the context of Python's "Do one thing and do it well" to not try and mix asyncio and celery together.
BUT what happens in cases where we want to be able to run a method both asynchronously and as an async task? then we have some options to consider:
The best example that I was able to find is the following: https://johnfraney.ca/posts/2018/12/20/writing-unit-tests-celery-tasks-async-functions/ (and I just found out that it is #Franey's response):
Define your async method.
Use asgiref's sync.async_to_sync module to wrap the async method and run it synchronously inside a celery task:
# tasks.py
import asyncio
from asgiref.sync import async_to_sync
from celery import Celery
app = Celery('async_test', broker='a_broker_url_goes_here')
async def return_hello():
await asyncio.sleep(1)
return 'hello'
#app.task(name="sync_task")
def sync_task():
async_to_sync(return_hello)()
A use case that I came upon in a FastAPI application was the reverse of the previous example:
An intense CPU bound process is hogging up the async endpoints.
The solution is to refactor the async CPU bound process into a celery task and pass a task instance for execution from the Celery queue.
A minimal example for visualization of that case:
import asyncio
import uvicorn
from celery import Celery
from fastapi import FastAPI
app = FastAPI(title='Example')
worker = Celery('worker', broker='a_broker_url_goes_here')
#worker.task(name='cpu_boun')
def cpu_bound_task():
# Does stuff but let's simplify it
print([n for n in range(1000)])
#app.get('/calculate')
async def calculate():
cpu_bound_task.delay()
if __name__ == "__main__":
uvicorn.run('main:app', host='0.0.0.0', port=8000)
Another solution seems to be what #juanra and #danius are proposing in their answers, but we have to keep in mind that performance tends to take a hit when we intermix sync and async executions, thus those answers need monitoring before we can decide to use them in a prod environment.
Finally, there are some ready-made solutions, that I cannot recommend (because I have not used them myself) but I will list them here:
Celery Pool AsyncIO which seems to solve exactly what Celery 5.0 didn't, but keep in mind that it seems a bit experimental (version 0.2.0 today 01/12/2021)
aiotasks claims to be "a Celery like task manager that distributes Asyncio coroutines" but seems a bit stale (latest commit around 2 years ago)
Well that didn't age so well did it? Version 5.0 of Celery didn't implement asyncio compatibility thus we cannot know when and if this will ever be implemented... Leaving this here for response legacy reasons (as it was the answer at the time) and for comment continuation.
That will be possible from Celery version 5.0 as stated on the official site:
http://docs.celeryproject.org/en/4.0/whatsnew-4.0.html#preface
The next major version of Celery will support Python 3.5 only, where we are planning to take advantage of the new asyncio library.
Dropping support for Python 2 will enable us to remove massive amounts of compatibility code, and going with Python 3.5 allows us to take advantage of typing, async/await, asyncio, and similar concepts there’s no alternative for in older versions.
The above was quoted from the previous link.
So the best thing to do is wait for version 5.0 to be distributed!
In the meantime, happy coding :)
This simple way worked fine for me:
import asyncio
from celery import Celery
app = Celery('tasks')
async def async_function(param1, param2):
# more async stuff...
pass
#app.task(name='tasks.task_name', queue='queue_name')
def task_name(param1, param2):
asyncio.run(async_function(param1, param2))
You can wrap any blocking call into a Task using run_in_executor as described in documentation, I also added in the example a custom timeout:
def run_async_task(
target,
*args,
timeout = 60,
**keywords
) -> Future:
loop = asyncio.get_event_loop()
return asyncio.wait_for(
loop.run_in_executor(
executor,
functools.partial(target, *args, **keywords)
),
timeout=timeout,
loop=loop
)
loop = asyncio.get_event_loop()
async_result = loop.run_until_complete(
run_async_task, your_task.delay, some_arg, some_karg=""
)
result = loop.run_until_complete(
run_async_task, async_result.result
)
Here is a simple helper that you can use to make a Celery task awaitable:
import asyncio
from asgiref.sync import sync_to_async
# Converts a Celery tasks to an async function
def task_to_async(task):
async def wrapper(*args, **kwargs):
delay = 0.1
async_result = await sync_to_async(task.delay)(*args, **kwargs)
while not async_result.ready():
await asyncio.sleep(delay)
delay = min(delay * 1.5, 2) # exponential backoff, max 2 seconds
return async_result.get()
return wrapper
Like sync_to_async, it can be used as a direct wrapper:
#shared_task
def get_answer():
sleep(10) # simulate long computation
return 42
result = await task_to_async(get_answer)()
...and as a decorator:
#task_to_async
#shared_task
def get_answer():
sleep(10) # simulate long computation
return 42
result = await get_answer()
Of course, this is not a perfect solution since it relies on polling.
However, it should be a good workaround to call Celery tasks from Django async views until Celery officially provides a better solution.
EDIT 2021/03/02: added the call to sync_to_async to support eager mode.
The cleanest way I've found to do this is to wrap the async function in asgiref.sync.async_to_sync (from asgiref):
from asgiref.sync import async_to_sync
from celery.task import periodic_task
async def return_hello():
await sleep(1)
return 'hello'
#periodic_task(
run_every=2,
name='return_hello',
)
def task_return_hello():
async_to_sync(return_hello)()
I pulled this example from a blog post I wrote.
I solved problem by combining Celery and asyncio in the celery-pool-asyncio library.
Here's my implementation of Celery handling async coroutines when necessary:
Wrap the Celery class to extend its functionnality:
from celery import Celery
from inspect import isawaitable
import asyncio
class AsyncCelery(Celery):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.patch_task()
if 'app' in kwargs:
self.init_app(kwargs['app'])
def patch_task(self):
TaskBase = self.Task
class ContextTask(TaskBase):
abstract = True
async def _run(self, *args, **kwargs):
result = TaskBase.__call__(self, *args, **kwargs)
if isawaitable(result):
await result
def __call__(self, *args, **kwargs):
asyncio.run(self._run(*args, **kwargs))
self.Task = ContextTask
def init_app(self, app):
self.app = app
conf = {}
for key in app.config.keys():
if key[0:7] == 'CELERY_':
conf[key[7:].lower()] = app.config[key]
if 'broker_transport_options' not in conf and conf.get('broker_url', '')[0:4] == 'sqs:':
conf['broker_transport_options'] = {'region': 'eu-west-1'}
self.config_from_object(conf)
celery = AsyncCelery()
For anyone who stumbles on this looking for help specifically with async sqlalchemy (ie, using the asyncio extension) and Celery tasks, explicitly disposing of the engine will fix the issue. This particular example worked with asyncpg.
Example:
from sqlalchemy.ext.asyncio import (
AsyncSession,
create_async_engine,
)
from sqlalchemy.orm import sessionmaker
from asgiref.sync import async_to_sync
engine = create_async_engine("some_uri", future=True)
async_session_factory = sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
#celery_app.task(name="task-name")
def sync_func() -> None:
async_to_sync(some_func)()
async def some_func() -> None:
async with get_db_session() as session:
result = await some_db_query(session)
# engine.dispose will be called on exit
#contextlib.asynccontextmanager
async def get_db_session() -> AsyncGenerator:
try:
db = async_session_factory()
yield db
finally:
await db.close()
await engine.dispose()
A nice way to implement Celery with asyncio:
import asyncio
from celery import Celery
app = Celery()
async def async_function(param):
print('do something')
#app.task()
def celery_task(param):
loop = asyncio.get_event_loop()
return loop.run_until_complete(async_function(param))

Categories

Resources