Convert third party library to asyncio - python

I have a third party library (in this case azure-cosmos) that is performing slow blocking operations. I'd like to use this with the asyncio library to leverage a degree of parallelism where I can start up another request as one is waiting for us data.
I've looked around online but been unable to find a concrete answer as to how to perform this wrapping of the external library; is it as easy as:
def external_sync_method(*args, **kwargs):
...
async def my_async_code():
args_list = [...]
return await asyncio.gather(*(external_sync_method(*args) for args in args_list))

Asynchronously run function func in a separate thread.
python >= 3.9
https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
async def my_async_code():
args_list = [...]
return await asyncio.gather(
*(
asyncio.to_thread(external_sync_method, *args)
for args in args_list
)
)
python < 3.9
https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.run_in_executor
async def run_sync_method(func, *args, **kwargs):
loop = asyncio.get_running_loop()
func_call = functools.partial(func, *args, **kwargs)
return await loop.run_in_executor(None, func_call)
async def my_async_code():
args_list = [...]
return await asyncio.gather(
*(
run_sync_method(external_sync_method, *args)
for args in args_list
)
)

Related

Python: Copy context (contextvars.Context) to a separate thread

As for now, I've found a lot of examples on how contextvars module behaves with asyncio, but none on how one behaves with threads (asyncio.get_event_loop().run_in_executor, threading.Thread, and so on).
My question is, how can I pass context to a separate thread? Below you can see a code snippet that does not work (python 3.9.8).
import typing
import asyncio
import contextvars
import concurrent.futures
class CustomThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
def submit(
self,
function: typing.Callable,
*args,
**kwargs
) -> concurrent.futures.Future:
context = contextvars.copy_context()
return super().submit(
context.run,
functools.partial(function, *args, **kwargs)
)
def function():
print(var.get())
async def main():
await asyncio.get_event_loop().run_in_executor(None, function)
if __name__ == '__main__':
var = contextvars.ContextVar('variable')
var.set('Message.')
asyncio.get_event_loop().set_default_executor(CustomThreadPoolExecutor)
asyncio.run(main())
You can use wrapper function that takes copy_context.items(), set them and call your function. functools.partial will help you to create wrapped function for passing to run_in_executor. This is working test for my decorators:
def test_run_in_thread_pool_executor():
def init(func, ctx_vars, *args, **kwargs):
for var, value in ctx_vars:
var.set(value)
return func(*args, **kwargs)
#async_add_headers('streaming')
async def wrapper(f):
loop = asyncio.get_event_loop()
ctx = contextvars.copy_context()
executor = futures.ThreadPoolExecutor(max_workers=5)
return await loop.run_in_executor(executor, functools.partial(init, f, ctx.items()))
#add_headers('client')
def foo():
assert caller_context_var.get() == 'streaming'
async def main_test():
await wrapper(foo)
asyncio.run(main_test())
Here add_headers and async_add_headers change some contextvars in order of calling functions. caller_context_var.get() would be equal to 'client' without init function.
Unfortunately it works only for ThreadPoolExecutor and doesn't for ProcessPoolExecutor because Context objects are not picklable. Check relative PEP 567 section. There are also example with executor:
executor = ThreadPoolExecutor()
current_context = contextvars.copy_context()
executor.submit(current_context.run, some_function)

Call an async function periodically?

I have the following function to call s(c) every 24 hours.
def schedule_next_sync():
t = datetime.datetime.now()
t = t.replace(hour=0) + datetime.timedelta(hours=24)
def wrapper():
s(c)
schedule_next_sync()
tornado.ioloop.IOLoop.current().add_timeout(datetime.datetime.timestamp(t), wrapper)
However, s() will be changed to an async function.
async def s(c):
How to update schedule_next_sync for async function? Should run s() synchronously? Or change schedule_next_sync() to an async function?
Once s is async, you could use asyncio.sleep() instead of the lower-level add_timeout():
async def schedule_next_sync():
async def call_forever():
while True:
await asyncio.sleep(1)
await s(c)
tornado.ioloop.IOLoop.current().create_task(call_forever())
If you really want to do it with timeouts, something like this should work:
def schedule_next_sync():
t = datetime.datetime.now() + datetime.timedelta(seconds=1)
def wrapper():
loop = asyncio.get_running_loop()
task = loop.create_task(s(c))
task.add_done_callback(lambda _: schedule_next_sync())
loop = tornado.ioloop.IOLoop.current()
loop.add_timeout(datetime.datetime.timestamp(t), wrapper)

Copying contexvars.Context between tasks

I have a program (an ASGI server) that is structured roughly like this:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan():
ctxvar.set("spam")
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(lifepsan())
await task
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
Because the lifespan event / endpoints are run in tasks, they can't share contextvars.
This is by design: tasks copy the context before executing, so lifespan can't set ctxvar properly.
This is the desired behavior for endpoints, but I would like for execution to appear like this (from a user's perspective):
async def lifespan():
ctxvar.set("spam")
await endpoint()
In other words, the endpoints are executed in their own independent context, but within the context of the lifespan.
I tried to get this to work by using contextlib.copy_context():
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan():
ctxvar.set("spam")
print("set")
async def endpoint():
print("get")
assert ctxvar.get() == "spam"
async def main():
ctx = contextvars.copy_context()
task = ctx.run(asyncio.create_task, lifepsan())
await task
endpoint_ctx = ctx.copy()
task = endpoint_ctx.run(asyncio.create_task, endpoint())
await task
asyncio.run(main())
As well as:
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(ctx.run(lifespan))
await task
endpoint_ctx = ctx.copy()
task = asyncio.create_task(endpoint_ctx.run(endpoint))
await task
However it seems that contextvars.Context.run does not work this way (I guess the context is bound when the coroutine is created but not when it is executed).
Is there a simple way to achieve the desired behavior, without restructuring how the tasks are being created or such?
Here's what I came up with, inspired by PEP 555 and asgiref:
from contextvars import Context, ContextVar, copy_context
from typing import Any
def _set_cvar(cvar: ContextVar, val: Any):
cvar.set(val)
class CaptureContext:
def __init__(self) -> None:
self.context = Context()
def __enter__(self) -> "CaptureContext":
self._outer = copy_context()
return self
def sync(self):
final = copy_context()
for cvar in final:
if cvar not in self._outer:
# new contextvar set
self.context.run(_set_cvar, cvar, final.get(cvar))
else:
final_val = final.get(cvar)
if self._outer.get(cvar) != final_val:
# value changed
self.context.run(_set_cvar, cvar, final_val)
def __exit__(self, *args: Any):
self.sync()
def restore_context(context: Context) -> None:
"""Restore `context` to the current Context"""
for cvar in context.keys():
try:
cvar.set(context.get(cvar))
except LookupError:
cvar.set(context.get(cvar))
Usage:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan(cap: CaptureContext):
with cap:
ctxvar.set("spam")
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
cap = CaptureContext()
await asyncio.create_task(lifepsan(cap))
restore_context(cap.context)
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
The sync() method is provided in case the task is long-running and you need to capture the context before it finishes. A somewhat contrived example:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan(cap: CaptureContext, event: asyncio.Event):
with cap:
ctxvar.set("spam")
cap.sync()
event.set()
await asyncio.sleep(float("inf"))
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
cap = CaptureContext()
event = asyncio.Event()
asyncio.create_task(lifepsan(cap, event))
await event.wait()
restore_context(cap.context)
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
I think it would still be much nicer if contextvars.Context.run worked with coroutines.
This feature will be supported in Python 3.11: https://github.com/python/cpython/issues/91150
You will be able to write:
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(lifepsan(), context=ctx)
await task
endpoint_ctx = ctx.copy()
task = asyncio.create_task(endpoint(), context=endpoint_ctx)
await task
In the meantime, in current Python versions you will need a backport of this feature. I can't think of a good one, but a bad one is here.

Does calling thread.join() blocks the event loop in an asynchronous context?

I'm implementing a web API using aiohttp, deployed using gunicorn with UVloop enabled --worker-class aiohttp.GunicornUVLoopWebWorker. Therefore, my code always runs in an asynchronous context. I had the ideia of implementing parallel jobs in the handling of requests for better performance.
I'm not using asyncio because i want Parallelism, not Concurrency.
I'm aware of multiprocessing and the GIL problem in python. But joining a process also applies to my question.
Here is an example:
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
for thread in request.context['threads']:
thread.join()
return ret
Taking into account that thread.join() or process.join() blocks the current thread, this will block the event loop (As far as my knowledge goes). How can I join asynchronously? What I want can be represented figuratively as this: await thread.join() or await process.join().
Update:
Thanks to #user4815162342 I was able to write proper code for my project:
Middleware:
from aiohttp.web import middleware
from util.process_session import ProcessSession
#middleware
async def context_init(request, handler):
request.context = {}
request.context['process_session'] = ProcessSession()
request.context['processes'] = {}
ret = await handler(request)
await request.context['process_session'].wait_for_all()
return ret
Util:
import asyncio
import concurrent.futures
from functools import partial
class ProcessSession():
def __init__(self):
self.loop = asyncio.get_running_loop()
self.pool = concurrent.futures.ProcessPoolExecutor()
self.futures = []
async def wait_for_all(self):
await asyncio.wait(self.futures)
def add_process(self, f, *args, **kwargs):
ret = self.loop.run_in_executor(self.pool, partial(f, *args, **kwargs))
self.futures.append(ret)
return ret
class ProcessBase():
def __init__(self, process_session, f, *args, **kwargs):
self.future = process_session.add_process(f, *args, **kwargs)
async def wait(self):
await asyncio.wait([self.future])
return self.future.result()
Answering your question: Yes, it does block the event loop.
I found that ThreadPoolExecutor works pretty well on this situations.
from util.process_session import ProcessSession
from concurrent.futures.thread import ThreadPoolExecutor
import asyncio
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
with ThreadPoolExecutor(1) as executor:
await asyncio.get_event_loop().run_in_executor(executor,
functools.partial(join_threads, data={
'threads': request.context['threads']
}))
return ret
def join_threads(threads):
for t in threads:
t.join()
I found a solution using multiprocesses. It can be done using a Pool. The standard lib provides some "async" methods (It's not really async, it just separates the initialization of the process from the process' output): apply_async
Using a simple async wrapper, I managed to deliver what I wanted:
from multiprocessing import Pool
from async_converter import sync_to_async
import asyncio
def f(x):
i = 0
while i < 10000000 * x:
i = i + 1
print("Finished: " + str(x))
return i
async def run():
print("Started with run")
with Pool(processes=4) as pool: # start 4 worker processes
result1 = pool.apply_async(f, (10,)) # evaluate "f(10)" asynchronously
result2 = pool.apply_async(f, (2,))
res1 = await sync_to_async(result1.get)()
print(res1)
res2 = await sync_to_async(result2.get)()
print(res2)
async def dummy(output):
print(output)
async def main():
# Schedule three calls *concurrently*:
await asyncio.gather(
run(),
dummy("Nice"),
dummy("Async"),
dummy("Loop"),
dummy("Perfect"),
dummy("Dummy1"),
dummy("Dummy2"),
dummy("Dummy3"),
dummy("Dummy4"),
dummy("Dummy5"),
dummy("Dummy6"),
dummy("Dummy7"),
dummy("Dummy8"),
dummy("Dummy9"),
dummy("Dummy10"),
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
outputs:
Perfect
Dummy6
Nice
Dummy1
Dummy7
Started with run
Dummy2
Dummy8
Dummy3
Dummy9
Async
Dummy4
Dummy10
Loop
Dummy5
Finished: 2
Finished: 10
100000000
20000000
Parallelism with asyncio :)

python asyncio add_done_callback with async def

I have 2 functions: The first one, def_a, is an asynchronous function and the second one is def_b which is a regular function and called with the result of def_a as a callback with the add_done_callback function.
My code looks like this:
import asyncio
def def_b(result):
next_number = result.result()
# some work on the next_number
print(next_number + 1)
async def def_a(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(def_a(1))
task.add_done_callback(def_b)
response = loop.run_until_complete(task)
loop.close()
And it's work perfectly.
The problem began when also the second function, def_b, became asynchronous. Now it looks like this:
async def def_b(result):
next_number = result.result()
# some asynchronous work on the next_number
print(next_number + 1)
But now I can not provide it to the add_done_callback function, because it's not a regular function.
My question is- Is it possible and how can I provide def_b to the add_done_callback function if def_b is asynchronous?
add_done_callback is considered a "low level" interface. When working with coroutines, you can chain them in many ways, for example:
import asyncio
async def my_callback(result):
print("my_callback got:", result)
return "My return value is ignored"
async def coro(number):
await asyncio.sleep(number)
return number + 1
async def add_success_callback(fut, callback):
result = await fut
await callback(result)
return result
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(coro(1))
task = add_success_callback(task, my_callback)
response = loop.run_until_complete(task)
print("response:", response)
loop.close()
Keep in mind add_done_callback will still call the callback if your future raises an exception (but calling result.result() will raise it).
This only works for one future job, if you have multiple async jobs, they will blocks each other, a better way is using asyncio.as_completed() to iterate future list:
import asyncio
async def __after_done_callback(future_result):
# await for something...
pass
async def __future_job(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
tasks = [asyncio.ensure_future(__future_job(x)) for x in range(100)] # create 100 future jobs
for f in asyncio.as_completed(tasks):
result = await f
await __after_done_callback(result)
loop.close()
You can try the aiodag library. It's a very lightweight wrapper around asyncio that abstracts away some of the async plumbing that you usually have to think about. From this example you won't be able to tell that things are running asynchronously since it's just 1 task that depends on another, but it is all running async.
import asyncio
from aiodag import task
#task
async def def_b(result):
# some asynchronous work on the next_number
print(result + 1)
#task
async def def_a(number):
await asyncio.sleep(number)
return number + 1
async def main():
a = def_a(1)
b = def_b(a) # this makes task b depend on task a
return await b
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(main())

Categories

Resources