cannot perform operation: another operation is in progress in pytest - python

I want to test some function, that work with asyncpg. If I run one test at a time, it works fine. But if I run several tests at a time, all tests except the first one crash with the error asyncpg.exceptions._base.InterfaceError: cannot perform operation: another operation is in progress.
Tests:
#pytest.mark.asyncio
async def test_project_connection(superuser_id, project_id):
data = element_data_random(project_id)
element_id = (await resolve_element_create(data=data, user_id=superuser_id))["id"]
project_elements = (await db_projects_element_ids_get([project_id]))[project_id]
assert element_id in project_elements
#pytest.mark.asyncio
async def test_project_does_not_exist(superuser_id):
data = element_data_random(str(uuid.uuid4()))
with pytest.raises(ObjectWithIdDoesNotExistError):
await resolve_element_create(data=data, user_id=superuser_id)
All functions for work with db use pool look like:
async def <some_db_func>(*args):
pool = await get_pool()
await pool.execute(...) # or fetch/fetchrow/fetchval
How I get the pool:
db_pool = None
async def get_pool():
global db_pool
async def init(con):
await con.set_type_codec('jsonb', encoder=ujson.dumps, decoder=ujson.loads, schema='pg_catalog')
await con.set_type_codec('json', encoder=ujson.dumps, decoder=ujson.loads, schema='pg_catalog')
if not db_pool:
dockerfiles_dir = os.path.join(src_dir, 'dockerfiles')
env_path = os.path.join(dockerfiles_dir, 'dev.env')
try:
# When code and DB inside docker containers
host = 'postgres-docker'
socket.gethostbyname(host)
except socket.error:
# When code on localhost, but DB inside docker container
host = 'localhost'
load_dotenv(dotenv_path=env_path)
db_pool = await asyncpg.create_pool(
database=os.getenv("POSTGRES_DBNAME"),
user=os.getenv("POSTGRES_USER"),
password=os.getenv("POSTGRES_PASSWORD"),
host=host,
init=init
)
return db_pool
As far as I understand under the hood, asynсpg creates a new connection and runs the request inside that connection if you run the request through pool. Which makes it clear that each request should have its own connection. However, this error occurs, which is caused when one connection tries to handle two requests at the same time

Okay, thanks to #Adelin I realized that I need to run each asynchronous test synchronously. I I'm new to asyncio so I didn't understand it right away and found a solution.
It was:
#pytest.mark.asyncio
async def test_...(*args):
result = await <some_async_func>
assert result == excepted_result
It become:
def test_...(*args):
async def inner()
result = await <some_async_func>
assert result == excepted_result
asyncio.get_event_loop().run_until_complete(inner())

The problem happens because each test function create it's own event-loop and it make asyncpg-pool confused with what event-loop is for it.
You can change event-loop scope to "session" from "function" by below on conftest.py.
You don't need to make it sequentially.
import asyncio
import pytest
#pytest.yield_fixture(scope="session")
def event_loop(request):
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()

Related

how to call functions/objects from another process

Summarize the problem
I have a flask server (with endpoints and socket events) and a discord bot, both work independently, I want to run them on parallel so I can trigger functions of the bot from a flask endpoint.
Describe what you have tried
For context, this is an example of the endpoint:
#app.route("/submit", methods=["POST"])
async def submit():
data = request.json
userid = int(os.getenv("USER_ID"))
message = f"```Title: {data['title']}\nMessage: {data['message']}```"
await send_dm(userid, message)
return data
Where send_dm in its own package looks like this
# notice that this is not a method of a function
# nor a decorated function with properties from the discord library
# it just uses an intance of the commands.Bot class
async def send_dm(userid: int, message: str):
user = await bot.fetch_user(userid)
channel = await user.create_dm()
await channel.send(message)
So to run them on parallel and be able to communicate them with each other I tried:
Attempt 1: multiprocessing module
def main():
executor = ProcessPoolExecutor(2)
loop = asyncio.new_event_loop()
loop.run_in_executor(executor, start_bot)
loop.run_in_executor(executor, start_server)
loop.run_forever()
if __name__ == "__main__":
run()
When the function on the endpoint mentioned executes I get the following error AttributeError: '_MissingSentinel' object has no attribute 'is_set' on concurrent tasks
Attempt 2: threading module
# make them aware of each other
bot.flask_app = app
app.bot = bot
async def main():
task = threading.Thread(target=start_bot)
task.start()
start_server()
if __name__ == "__main__":
asyncio.run(main())
This approach brings two issues:
First is that to run start_bot I use .start() method instead of .run() because according to the this example .run() created its own event pool which would make it unreachable by other processes, and .start() is an async function, so when running this I get the error: RuntimeError: You cannot use AsyncToSync in the same thread as an async event loop - just await the async function directly.
Second is that even using the run.() function then the same issue arises when executing mentioned endpoint.
Attempt 3: asyncio module
def main():
executor = ProcessPoolExecutor(2)
loop = asyncio.new_event_loop()
boo = loop.run_in_executor(executor, start_bot)
baa = loop.run_in_executor(executor, start_server)
loop.run_forever()
if __name__ == "__main__":
main()
This time I actually get the execute both processes but still cannot call the function I want from the flask endpoint.
I also tried
await asyncio.gather([start_server(), start_bot()])
But same issue as Attempt 2, and I already upgraded the flask[async] module so that is not the issue anymore.
Show some code
To reproduce what I have right now you can either check the full repo here that has only 4 files or this sample should be enough to reproduce.
from server import socketio, app
from bot import bot
from dotenv import load_dotenv
import os
import asyncio
import threading
env_path = os.path.dirname(__file__) + "/.env"
load_dotenv(env_path)
def start_server():
socketio.run(app)
def start_bot():
token = os.getenv("BOT_TOKEN")
bot.run(token)
async def main():
# this doesn't achieve what I want and is the main part of the problem
start_server()
start_bot()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("Program exited")
Did I miss something?, point it out in the comments and I will add it in the edit.

Why is an asyncio task garbage collected when opening a connection inside it?

I am creating a server which needs to make an external request while responding. To handle concurrent requests I'm using Python's asyncio library. I have followed some examples from the standard library. It seems however that some of my tasks are destroyed, printing Task was destroyed but it is pending! to my terminal. After some debugging and research I found a stackoverflow answer which seemed to explain why.
I have created a minimal example demonstrating this effect below. My question is in what way should one counteract this effect? Storing a hard reference to the task, by for example storing asyncio.current_task() in a global variable mitigates the issue. It also seems to work fine if I wrap the future remote_read.read() as await asyncio.wait_for(remote_read.read(), 5). However I do feel like these solutions are ugly.
# run and visit http://localhost:8080/ in your browser
import asyncio
import gc
async def client_connected_cb(reader, writer):
remote_read, remote_write = await asyncio.open_connection("google.com", 443, ssl=True)
await remote_read.read()
async def cleanup():
while True:
gc.collect()
await asyncio.sleep(1)
async def main():
server = await asyncio.start_server(client_connected_cb, "localhost", 8080)
await asyncio.gather(server.serve_forever(), cleanup())
asyncio.run(main())
I am running Python 3.10 on macOS 10.15.7.
It looks that by the time being, the only way is actually keeping
a reference manually.
Maybe a decorator is something more convenient than having
to manually add the code in each async function.
I opted for the class design, so that a class attribute
can hold the hard-references while the tasks run. (A
local variable in the wrapper function would be part
of the task-reference cycle, and the garbage collection
would trigger all the same):
# run and visit http://localhost:8080/ in your browser
import asyncio
import gc
from functools import wraps
import weakref
class Shielded:
registry = set()
def __init__(self, func):
self.func = func
async def __call__(self, *args, **kw):
self.registry.add(task:=asyncio.current_task())
try:
result = await self.func(*args, **kw)
finally:
self.registry.remove(task)
return result
def _Shielded(func):
# Used along with the print sequence to assert the task was actually destroyed without commenting
async def wrapper(*args, **kwargs):
ref = weakref.finalize(asyncio.current_task(), lambda: print("task destroyed"))
return await func(*args, **kwargs)
return wrapper
#Shielded
async def client_connected_cb(reader, writer):
print("at task start")
#registry.append(asyncio.current_task())
# I've connected this to a socket in an interactive session, I'd explictly .close() for debugging:
remote_read, remote_write = await asyncio.open_connection("localhost", 8060, ssl=False)
print("comensing remote read")
await remote_read.read()
print("task complete")
async def cleanup():
while True:
gc.collect()
await asyncio.sleep(1)
async def main():
server = await asyncio.start_server(client_connected_cb, "localhost", 8080)
await asyncio.gather(server.serve_forever(), cleanup())
asyncio.run(main())
Moreover, I wanted to "really see it", so I created a "fake" _Shielded
decorator that would just log something when the underlying task
got deleted: "task complete" is never printed with it, indeed.

FastAPI asynchronous background tasks blocks other requests?

I want to run a simple background task in FastAPI, which involves some computation before dumping it into the database. However, the computation would block it from receiving any more requests.
from fastapi import BackgroundTasks, FastAPI
app = FastAPI()
db = Database()
async def task(data):
otherdata = await db.fetch("some sql")
newdata = somelongcomputation(data,otherdata) # this blocks other requests
await db.execute("some sql",newdata)
#app.post("/profile")
async def profile(data: Data, background_tasks: BackgroundTasks):
background_tasks.add_task(task, data)
return {}
What is the best way to solve this issue?
Your task is defined as async, which means fastapi (or rather starlette) will run it in the asyncio event loop.
And because somelongcomputation is synchronous (i.e. not waiting on some IO, but doing computation) it will block the event loop as long as it is running.
I see a few ways of solving this:
Use more workers (e.g. uvicorn main:app --workers 4). This will allow up to 4 somelongcomputation in parallel.
Rewrite your task to not be async (i.e. define it as def task(data): ... etc). Then starlette will run it in a separate thread.
Use fastapi.concurrency.run_in_threadpool, which will also run it in a separate thread. Like so:
from fastapi.concurrency import run_in_threadpool
async def task(data):
otherdata = await db.fetch("some sql")
newdata = await run_in_threadpool(lambda: somelongcomputation(data, otherdata))
await db.execute("some sql", newdata)
Or use asyncios's run_in_executor directly (which run_in_threadpool uses under the hood):
import asyncio
async def task(data):
otherdata = await db.fetch("some sql")
loop = asyncio.get_running_loop()
newdata = await loop.run_in_executor(None, lambda: somelongcomputation(data, otherdata))
await db.execute("some sql", newdata)
You could even pass in a concurrent.futures.ProcessPoolExecutor as the first argument to run_in_executor to run it in a separate process.
Spawn a separate thread / process yourself. E.g. using concurrent.futures.
Use something more heavy-handed like celery. (Also mentioned in the fastapi docs here).
If your task is CPU bound you could use multiprocessing, there is way to do that with Background task in FastAPI:
https://stackoverflow.com/a/63171013
Although you should consider to use something like Celery if there are lot of cpu-heavy tasks.
Read this issue.
Also in the example below, my_model.function_b could be any blocking function or process.
TL;DR
from starlette.concurrency import run_in_threadpool
#app.get("/long_answer")
async def long_answer():
rst = await run_in_threadpool(my_model.function_b, arg_1, arg_2)
return rst
This is a example of Background Task To FastAPI
from fastapi import FastAPI
import asyncio
app = FastAPI()
x = [1] # a global variable x
#app.get("/")
def hello():
return {"message": "hello", "x":x}
async def periodic():
while True:
# code to run periodically starts here
x[0] += 1
print(f"x is now {x}")
# code to run periodically ends here
# sleep for 3 seconds after running above code
await asyncio.sleep(3)
#app.on_event("startup")
async def schedule_periodic():
loop = asyncio.get_event_loop()
loop.create_task(periodic())
if __name__ == "__main__":
import uvicorn
uvicorn.run(app)

How to fix this async generator object is not an iterator issue in Python?

I'm trying to mock a websockets data stream and I'm getting this error: 'async_generator' object is not an iterator
This is my generator code:
from time import sleep
mock_sf_record = '{"payload": ...}'
async def generateMessages():
sleep(5)
yield mock_sf_record
and the code that calls this code:
async def subscribe(subscription):
global RECEIVED_MESSAGES_CACHE
...
while True:
messageStream = await(next(generateMessages())) if ENV == 'dev' else await websocket.recv()
What can I do? What am I doing wrong? I'm basically using the generateMessages() generator to create a stream of messages, but this isn't working...
The code that is calling subscribe:
for subscription in SUBSCRIPTION_TYPES:
loop.create_task(subscribe(subscription))
loop.run_forever()
More importantly, if I change the code to use a synchronous generator, this only generates messages for a single subscription and I never seem to generate messsages for any other subscription... it seems to block on a single thread. Why is this?
messageStream = (next(generateMessages())) if ENV == 'dev' else await websocket.recv()
and
# generator that generates mock SF data
from asyncio import sleep
mock_sf_record = '{"payload": ...}'
def generateMessages():
sleep(5)
yield mock_sf_record
Why does the synchronous generator cause problems?
The right way:
async def subscribe(subscription):
global RECEIVED_MESSAGES_CACHE
...
gen = generateMessages() # init async generator
messageStream = (await gen.__anext__()) if ENV == 'dev' else (await websocket.recv())
https://www.python.org/dev/peps/pep-0525/#support-for-asynchronous-iteration-protocol

Python asyncio, aioamqp callbacks and pytest

I'm trying to write some tests for some asynchronous Python code using the aioamqp message broker, but pytest and callbacks fail me.
Simply put, when the aioamqp basic_consume() function receives a message and calls the assigned asynchronous callback, inside the callback I can do whatever I like -- reference unassigned variables, assert something outrageous -- and pytest happily passes the test. Clearly an exception gets raised under the hood and the test is interrupted, since the callback function never runs further than the first failing line, but the failure never rises all the way to pytest.
Here's a code snippet to demonstrate:
import aioamqp
import asyncio
import pytest
MQ_HOST = '0.0.0.0'
MQ_PORT = 5672
MQ_LOGIN = 'login'
MQ_PASSWORD = 'password'
class MockMQ:
def __init__(self):
self.loop = asyncio.get_event_loop()
self.transport = None
self.protocol = None
async def connect(self):
try:
self.transport, self.protocol = await aioamqp.connect(
host=MQ_HOST, port=MQ_PORT, login=MQ_LOGIN, password=MQ_PASSWORD
)
self.channel = await self.protocol.channel()
except aioamqp.AmqpClosedConnection:
print('closed connection')
return
async def close(self):
await self.protocol.close()
self.transport.close()
async def publish(self, data, queue_name, exchange='', properties=None):
queue = await self.channel.queue_declare(queue_name)
await self.channel.publish(data, exchange, queue_name, properties=properties)
async def consume(self, callback, queue_name):
await self.channel.basic_consume(callback, queue_name=queue_name)
#pytest.mark.asyncio
async def test_mq():
"""Basic ping-pong test for RabbitMQ."""
QUEUE_NAME = 'my_queue'
#pytest.mark.asyncio
async def callback(channel, body, envelope, properties):
"""This is the callback called when a MQ message is consumed."""
print('we are here')
await channel.basic_client_ack(envelope.delivery_tag)
print(body) # this gets printed as well
foo = bar * 2 # this is where we fail
assert body == b'bar'
print('we never arrive here')
mq = MockMQ()
await mq.connect()
await mq.consume(callback, QUEUE_NAME)
await mq.publish(b'foo', QUEUE_NAME)
await asyncio.sleep(1.0)
await mq.close()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(test_mq())
Running this via the main program with IPython results correctly in an exception, since it doesn't get swallowed by pytest.
What is the proper way of writing tests for pytest in this case? pytest-asyncio does not seem to affect this issue in the least.
EDIT: I might as well add that my dev environment uses Django and pytest-django, but removing it doesn't change the result either.

Categories

Resources