Copying contexvars.Context between tasks

Copying contexvars.Context between tasks - python

I have a program (an ASGI server) that is structured roughly like this:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan():
ctxvar.set("spam")
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(lifepsan())
await task
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
Because the lifespan event / endpoints are run in tasks, they can't share contextvars.
This is by design: tasks copy the context before executing, so lifespan can't set ctxvar properly.
This is the desired behavior for endpoints, but I would like for execution to appear like this (from a user's perspective):
async def lifespan():
ctxvar.set("spam")
await endpoint()
In other words, the endpoints are executed in their own independent context, but within the context of the lifespan.
I tried to get this to work by using contextlib.copy_context():
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan():
ctxvar.set("spam")
print("set")
async def endpoint():
print("get")
assert ctxvar.get() == "spam"
async def main():
ctx = contextvars.copy_context()
task = ctx.run(asyncio.create_task, lifepsan())
await task
endpoint_ctx = ctx.copy()
task = endpoint_ctx.run(asyncio.create_task, endpoint())
await task
asyncio.run(main())
As well as:
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(ctx.run(lifespan))
await task
endpoint_ctx = ctx.copy()
task = asyncio.create_task(endpoint_ctx.run(endpoint))
await task
However it seems that contextvars.Context.run does not work this way (I guess the context is bound when the coroutine is created but not when it is executed).
Is there a simple way to achieve the desired behavior, without restructuring how the tasks are being created or such?

Here's what I came up with, inspired by PEP 555 and asgiref:
from contextvars import Context, ContextVar, copy_context
from typing import Any
def _set_cvar(cvar: ContextVar, val: Any):
cvar.set(val)
class CaptureContext:
def __init__(self) -> None:
self.context = Context()
def __enter__(self) -> "CaptureContext":
self._outer = copy_context()
return self
def sync(self):
final = copy_context()
for cvar in final:
if cvar not in self._outer:
# new contextvar set
self.context.run(_set_cvar, cvar, final.get(cvar))
else:
final_val = final.get(cvar)
if self._outer.get(cvar) != final_val:
# value changed
self.context.run(_set_cvar, cvar, final_val)
def __exit__(self, *args: Any):
self.sync()
def restore_context(context: Context) -> None:
"""Restore `context` to the current Context"""
for cvar in context.keys():
try:
cvar.set(context.get(cvar))
except LookupError:
cvar.set(context.get(cvar))
Usage:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan(cap: CaptureContext):
with cap:
ctxvar.set("spam")
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
cap = CaptureContext()
await asyncio.create_task(lifepsan(cap))
restore_context(cap.context)
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
The sync() method is provided in case the task is long-running and you need to capture the context before it finishes. A somewhat contrived example:
import asyncio
import contextvars
ctxvar = contextvars.ContextVar("ctx")
async def lifepsan(cap: CaptureContext, event: asyncio.Event):
with cap:
ctxvar.set("spam")
cap.sync()
event.set()
await asyncio.sleep(float("inf"))
async def endpoint():
assert ctxvar.get() == "spam"
async def main():
cap = CaptureContext()
event = asyncio.Event()
asyncio.create_task(lifepsan(cap, event))
await event.wait()
restore_context(cap.context)
task = asyncio.create_task(endpoint())
await task
asyncio.run(main())
I think it would still be much nicer if contextvars.Context.run worked with coroutines.

This feature will be supported in Python 3.11: https://github.com/python/cpython/issues/91150
You will be able to write:
async def main():
ctx = contextvars.copy_context()
task = asyncio.create_task(lifepsan(), context=ctx)
await task
endpoint_ctx = ctx.copy()
task = asyncio.create_task(endpoint(), context=endpoint_ctx)
await task
In the meantime, in current Python versions you will need a backport of this feature. I can't think of a good one, but a bad one is here.

Related

Aggregation of 2 RabbitMQ messages does not work properly (messages hanging unacked)

I need to listen tasks on 2 queues, so I wrote the code below, but it has a problem. Currently it behaves like this: if the code started when 2 queues were full, it works great. But if queues were empty one of them was, the code reads messages, but does not proccess them (does not send ack, does not do the logic). But the messages became unacked, until I stop the code. I do not see any reason to be them unacked and unprocessed.
I can't understand what is wrong with the code? May be there is another way to aggregate 2 or more queues like this?
# task_processor.py
from aio_pika import IncomingMessage
class TaskProcessor:
MAX_TASKS_PER_INSTANCE = 1
def __init__(self):
self._tasks = []
def can_accept_new_task(self) -> bool:
return len(self._tasks) < self.MAX_TASKS_PER_INSTANCE
async def process(self, message: IncomingMessage):
self._tasks.append(message)
print(message.body)
await message.ack()
self._tasks.pop()
# main.py
import asyncio
from asyncio import QueueEmpty
from typing import Callable
import aio_pika
from aio_pika import RobustQueue
from dotenv import load_dotenv
load_dotenv()
from core.logger.logger import logger
from core.services.rabbitmq.task_processor.task_processor import TaskProcessor
async def get_single_task(queue: RobustQueue):
while True:
try:
msg = await queue.get(timeout=3600)
return msg
except QueueEmpty:
await asyncio.sleep(3)
except asyncio.exceptions.TimeoutError:
logger.warning('queue timeout error')
pass
except Exception as ex:
logger.error(f"{queue} errored", exc_info=ex)
async def task_aggregator(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
while True:
if should_take_new_task_cb():
queue2, queue1 = queue1, queue2
gen1 = get_single_task(queue1)
gen2 = get_single_task(queue2)
done, _ = await asyncio.wait([gen1, gen2], return_when=asyncio.FIRST_COMPLETED)
for item in done:
result = item.result()
yield result
else:
await asyncio.sleep(1)
async def tasks(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
async for task in task_aggregator(queue1, queue2, should_take_new_task_cb):
yield task
async def main():
connection = await aio_pika.connect_robust(
f"amqp://user:password#host:port/vhost?heartbeat={180}"
)
channel1 = connection.channel()
channel2 = connection.channel()
await channel1.initialize()
await channel2.initialize()
queue1 = await channel1.get_queue('queue1')
queue2 = await channel2.get_queue('queue2')
task_processor = TaskProcessor()
task_generator = tasks(queue1, queue2, task_processor.can_accept_new_task)
while True:
if task_processor.can_accept_new_task():
task = await anext(task_generator)
await task_processor.process(task)
else:
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.run(main())

Cannot trigger an async function from another threaded function in Python

I am making a discord bot that will grab a json using requests from time to time, and then send the relevant information to a specific channel.
I have the following classes:
Helper, which is the discord bot itself, that runs async from the start, inside an asyncio.gather;
tasker that controls the interval which calls the class that will do the requests. It runs in a different thread so it doesn't stop the async Helper while it waits
getInfo that does the requests, store the info and should talk with Helper
I am having 2 problems right now:
While the tasker is on a different thread, every time I try to talk with Helper via getInfo it gives me the errors RuntimeError: no running event loop and RuntimeWarning: coroutine 'getInfo.discordmsg' was never awaited
If I dont run it on a different thread, however, it does work on the TestStatus: 1 but it makes Helper get stuck and stop running with TestStatus: 2
Anyway, here is the code
import requests
import asyncio
import discord
from discord.ext import commands, tasks
from datetime import datetime, timedelta
import threading
class Helper(discord.Client):
async def on_ready(self):
global discordbot, taskervar
servername = 'ServerName'
discordbot = self
self.servidores = dict()
self.canais = dict()
for i in range(len(self.guilds)):
self.servidores[self.guilds[i].name] = {}
self.servidores[self.guilds[i].name]['guild']=self.guilds[i]
servidor = self.guilds[i]
for k in range(len(servidor.channels)):
canal = servidor.channels[k]
self.canais[str(canal.name)] = canal
if 'bottalk' not in self.canais.keys():
newchan = await self.servidores[self.guilds[i].name]['guild'].create_text_channel('bottalk')
self.canais[str(newchan.name)] = newchan
self.servidores[self.guilds[i].name]['canais'] = self.canais
self.bottalk = self.get_channel(self.servidores[servername]['canais']['bottalk'].id)
await self.msg("Bot online: " + converteHora(datetime.now(),True))
print(f'{self.user} has connected to Discord!')
taskervar.startprocess()
async def msg(self, msg):
await self.bottalk.send(msg)
async def on_message(self, message):
if message.author == self.user:
return
else:
print(message)
class tasker:
def __init__(self):
global discordbot, taskervar
print('Tasker start')
taskervar = self
self.waiter = threading.Event()
self.lastupdate = datetime.now()
self.nextupdate = datetime.now()
self.thread = threading.Thread(target=self.requests)
def startprocess(self):
if not self.thread.is_alive():
self.waiter = threading.Event()
self.interval = 60*5
self.thread = threading.Thread(target=self.requests)
self.thread.start()
def requests(self):
while not self.waiter.is_set():
getInfo()
self.lastupdate = datetime.now()
self.nextupdate = datetime.now()+timedelta(seconds=self.interval)
self.waiter.wait(self.interval)
def stopprocess(self):
self.waiter.set()
class getInfo:
def __init__(self):
global discordbot, taskervar
self.requests()
async def discordmsg(self,msg):
await discordbot.msg(msg)
def requests(self):
jsondata = {"TestStatus": 1}
if jsondata['TestStatus'] == 1:
print('here')
asyncio.create_task(self.discordmsg("SOMETHING WENT WRONG"))
taskervar.stopprocess()
return
elif jsondata['TestStatus'] == 2:
print('test')
hora = converteHora(datetime.now(),True)
asyncio.create_task(self.discordmsg(str("Everything is fine but not now: " + hora )))
print('test2')
def converteHora(dateUTC, current=False):
if current:
response = (dateUTC.strftime("%d/%m/%Y, %H:%M:%S"))
else:
response = (dateutil.parser.isoparse(dateUTC)-timedelta(hours=3)).strftime("%d/%m/%Y, %H:%M:%S")
return response
async def main():
TOKEN = 'TOKEN GOES HERE'
tasker()
await asyncio.gather(
await Helper().start(TOKEN)
)
if __name__ == '__main__':
asyncio.run(main())

Your primary problem is you don't give your secondary thread access to the asyncio event loop. You can't just await and/or create_task a coroutine on a global object (One of many reasons to avoid using global objects in the first place). Here is how you could modify your code to accomplish that:
class tasker:
def __init__(self):
# ...
self.loop = asyncio.get_running_loop()
# ...
class getInfo:
#...
def requests(self):
# replace the create_tasks calls with this.
asyncio.run_coroutine_threadsafe(self.discordmsg, taskervar.loop)
This uses your global variables because I don't want to rewrite your entire program, but I still strongly recommend avoiding them and considering a re-write yourself.
All that being said, I suspect you will still have this bug:
If I dont run it on a different thread, however, it does work on the TestStatus: 1 but it makes Helper get stuck and stop running with TestStatus: 2
I can't tell what would cause this issue and I'm running into trouble reproducing this on my machine. Your code is pretty hard to read and is missing some details for reproducibility. I would imagine that is part of the reason why you didn't get an answer in the first place. I'm sure you're aware of this article but might be worth a re-visit for better practices in sharing code. https://stackoverflow.com/help/minimal-reproducible-example

How to check what coroutine is completed after asyncio.wait

Consider the following code:
import random
import asyncio
class RandomLife(object):
def __init__(self, name: str):
self.name = name
self.coro = asyncio.sleep(random.randrange(0, 5))
def __await__(self):
return self.coro.__await__()
async def main():
objects = [RandomLife("one"), RandomLife("two"), RandomLife("three")]
finished, unfinished = await asyncio.wait(objects, return_when=asyncio.FIRST_COMPLETED)
print(finished)
await asyncio.wait(unfinished)
if __name__ == "__main__":
asyncio.run(main())
After then first asyncio.wait I want to know what instance of RandomLife has completed. But the finished variable is a set of Task s, rather than a RandomLife instance. How do I convert this task to a RandomLife? Is it possible?

As the documentation warns:
Note wait() schedules coroutines as Tasks automatically and later returns those implicitly created Task objects in (done, pending) sets. Therefore the following code won’t work as expected:
async def foo():
return 42
coro = foo()
done, pending = await asyncio.wait({coro})
if coro in done:
# This branch will never be run!
Here is how the above snippet can be fixed:
async def foo():
return 42
task = asyncio.create_task(foo())
done, pending = await asyncio.wait({task})
if task in done:
# Everything will work as expected now.
We can employ the same trick. First, we need to wrap all the coroutines to tasks, and then set up mapping from a task created to its RandomLife instance:
import random
import asyncio
class RandomLife(object):
def __init__(self, name: str):
self.name = name
self.coro = asyncio.sleep(random.randrange(0, 5))
def __await__(self):
return self.coro.__await__()
async def main():
objects = [RandomLife("one"), RandomLife("two"), RandomLife("three")]
# Wrap all the coros to tasks, as the documentation suggests.
tasks = [asyncio.create_task(o.coro) for o in objects]
# Set up mapping from tasks created to RandomLife instances.
task2life = dict(zip(tasks, objects))
finished, unfinished = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
# Get first task finished.
finised_task = list(finished)[0]
# Map it back to the RandomLife instance.
finished_life = task2life[finised_task]
print(finished_life.name)
await asyncio.wait(unfinished)
if __name__ == "__main__":
asyncio.run(main())

Does calling thread.join() blocks the event loop in an asynchronous context?

I'm implementing a web API using aiohttp, deployed using gunicorn with UVloop enabled --worker-class aiohttp.GunicornUVLoopWebWorker. Therefore, my code always runs in an asynchronous context. I had the ideia of implementing parallel jobs in the handling of requests for better performance.
I'm not using asyncio because i want Parallelism, not Concurrency.
I'm aware of multiprocessing and the GIL problem in python. But joining a process also applies to my question.
Here is an example:
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
for thread in request.context['threads']:
thread.join()
return ret
Taking into account that thread.join() or process.join() blocks the current thread, this will block the event loop (As far as my knowledge goes). How can I join asynchronously? What I want can be represented figuratively as this: await thread.join() or await process.join().
Update:
Thanks to #user4815162342 I was able to write proper code for my project:
Middleware:
from aiohttp.web import middleware
from util.process_session import ProcessSession
#middleware
async def context_init(request, handler):
request.context = {}
request.context['process_session'] = ProcessSession()
request.context['processes'] = {}
ret = await handler(request)
await request.context['process_session'].wait_for_all()
return ret
Util:
import asyncio
import concurrent.futures
from functools import partial
class ProcessSession():
def __init__(self):
self.loop = asyncio.get_running_loop()
self.pool = concurrent.futures.ProcessPoolExecutor()
self.futures = []
async def wait_for_all(self):
await asyncio.wait(self.futures)
def add_process(self, f, *args, **kwargs):
ret = self.loop.run_in_executor(self.pool, partial(f, *args, **kwargs))
self.futures.append(ret)
return ret
class ProcessBase():
def __init__(self, process_session, f, *args, **kwargs):
self.future = process_session.add_process(f, *args, **kwargs)
async def wait(self):
await asyncio.wait([self.future])
return self.future.result()

Answering your question: Yes, it does block the event loop.
I found that ThreadPoolExecutor works pretty well on this situations.
from util.process_session import ProcessSession
from concurrent.futures.thread import ThreadPoolExecutor
import asyncio
from aiohttp.web import middleware
#middleware
async def context_init(request, handler):
request.context = {}
request.context['threads'] = []
ret = await handler(request)
with ThreadPoolExecutor(1) as executor:
await asyncio.get_event_loop().run_in_executor(executor,
functools.partial(join_threads, data={
'threads': request.context['threads']
}))
return ret
def join_threads(threads):
for t in threads:
t.join()

I found a solution using multiprocesses. It can be done using a Pool. The standard lib provides some "async" methods (It's not really async, it just separates the initialization of the process from the process' output): apply_async
Using a simple async wrapper, I managed to deliver what I wanted:
from multiprocessing import Pool
from async_converter import sync_to_async
import asyncio
def f(x):
i = 0
while i < 10000000 * x:
i = i + 1
print("Finished: " + str(x))
return i
async def run():
print("Started with run")
with Pool(processes=4) as pool: # start 4 worker processes
result1 = pool.apply_async(f, (10,)) # evaluate "f(10)" asynchronously
result2 = pool.apply_async(f, (2,))
res1 = await sync_to_async(result1.get)()
print(res1)
res2 = await sync_to_async(result2.get)()
print(res2)
async def dummy(output):
print(output)
async def main():
# Schedule three calls *concurrently*:
await asyncio.gather(
run(),
dummy("Nice"),
dummy("Async"),
dummy("Loop"),
dummy("Perfect"),
dummy("Dummy1"),
dummy("Dummy2"),
dummy("Dummy3"),
dummy("Dummy4"),
dummy("Dummy5"),
dummy("Dummy6"),
dummy("Dummy7"),
dummy("Dummy8"),
dummy("Dummy9"),
dummy("Dummy10"),
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
outputs:
Perfect
Dummy6
Nice
Dummy1
Dummy7
Started with run
Dummy2
Dummy8
Dummy3
Dummy9
Async
Dummy4
Dummy10
Loop
Dummy5
Finished: 2
Finished: 10
100000000
20000000
Parallelism with asyncio :)

python asyncio add_done_callback with async def

I have 2 functions: The first one, def_a, is an asynchronous function and the second one is def_b which is a regular function and called with the result of def_a as a callback with the add_done_callback function.
My code looks like this:
import asyncio
def def_b(result):
next_number = result.result()
# some work on the next_number
print(next_number + 1)
async def def_a(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(def_a(1))
task.add_done_callback(def_b)
response = loop.run_until_complete(task)
loop.close()
And it's work perfectly.
The problem began when also the second function, def_b, became asynchronous. Now it looks like this:
async def def_b(result):
next_number = result.result()
# some asynchronous work on the next_number
print(next_number + 1)
But now I can not provide it to the add_done_callback function, because it's not a regular function.
My question is- Is it possible and how can I provide def_b to the add_done_callback function if def_b is asynchronous?

add_done_callback is considered a "low level" interface. When working with coroutines, you can chain them in many ways, for example:
import asyncio
async def my_callback(result):
print("my_callback got:", result)
return "My return value is ignored"
async def coro(number):
await asyncio.sleep(number)
return number + 1
async def add_success_callback(fut, callback):
result = await fut
await callback(result)
return result
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(coro(1))
task = add_success_callback(task, my_callback)
response = loop.run_until_complete(task)
print("response:", response)
loop.close()
Keep in mind add_done_callback will still call the callback if your future raises an exception (but calling result.result() will raise it).

This only works for one future job, if you have multiple async jobs, they will blocks each other, a better way is using asyncio.as_completed() to iterate future list:
import asyncio
async def __after_done_callback(future_result):
# await for something...
pass
async def __future_job(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
tasks = [asyncio.ensure_future(__future_job(x)) for x in range(100)] # create 100 future jobs
for f in asyncio.as_completed(tasks):
result = await f
await __after_done_callback(result)
loop.close()

You can try the aiodag library. It's a very lightweight wrapper around asyncio that abstracts away some of the async plumbing that you usually have to think about. From this example you won't be able to tell that things are running asynchronously since it's just 1 task that depends on another, but it is all running async.
import asyncio
from aiodag import task
#task
async def def_b(result):
# some asynchronous work on the next_number
print(result + 1)
#task
async def def_a(number):
await asyncio.sleep(number)
return number + 1
async def main():
a = def_a(1)
b = def_b(a) # this makes task b depend on task a
return await b
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(main())

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Copying contexvars.Context between tasks - python

Related

Aggregation of 2 RabbitMQ messages does not work properly (messages hanging unacked)

Cannot trigger an async function from another threaded function in Python

How to check what coroutine is completed after asyncio.wait

Does calling thread.join() blocks the event loop in an asynchronous context?

python asyncio add_done_callback with async def

Categories

Resources