Summarize the problem
I have a flask server (with endpoints and socket events) and a discord bot, both work independently, I want to run them on parallel so I can trigger functions of the bot from a flask endpoint.
Describe what you have tried
For context, this is an example of the endpoint:
#app.route("/submit", methods=["POST"])
async def submit():
data = request.json
userid = int(os.getenv("USER_ID"))
message = f"```Title: {data['title']}\nMessage: {data['message']}```"
await send_dm(userid, message)
return data
Where send_dm in its own package looks like this
# notice that this is not a method of a function
# nor a decorated function with properties from the discord library
# it just uses an intance of the commands.Bot class
async def send_dm(userid: int, message: str):
user = await bot.fetch_user(userid)
channel = await user.create_dm()
await channel.send(message)
So to run them on parallel and be able to communicate them with each other I tried:
Attempt 1: multiprocessing module
def main():
executor = ProcessPoolExecutor(2)
loop = asyncio.new_event_loop()
loop.run_in_executor(executor, start_bot)
loop.run_in_executor(executor, start_server)
loop.run_forever()
if __name__ == "__main__":
run()
When the function on the endpoint mentioned executes I get the following error AttributeError: '_MissingSentinel' object has no attribute 'is_set' on concurrent tasks
Attempt 2: threading module
# make them aware of each other
bot.flask_app = app
app.bot = bot
async def main():
task = threading.Thread(target=start_bot)
task.start()
start_server()
if __name__ == "__main__":
asyncio.run(main())
This approach brings two issues:
First is that to run start_bot I use .start() method instead of .run() because according to the this example .run() created its own event pool which would make it unreachable by other processes, and .start() is an async function, so when running this I get the error: RuntimeError: You cannot use AsyncToSync in the same thread as an async event loop - just await the async function directly.
Second is that even using the run.() function then the same issue arises when executing mentioned endpoint.
Attempt 3: asyncio module
def main():
executor = ProcessPoolExecutor(2)
loop = asyncio.new_event_loop()
boo = loop.run_in_executor(executor, start_bot)
baa = loop.run_in_executor(executor, start_server)
loop.run_forever()
if __name__ == "__main__":
main()
This time I actually get the execute both processes but still cannot call the function I want from the flask endpoint.
I also tried
await asyncio.gather([start_server(), start_bot()])
But same issue as Attempt 2, and I already upgraded the flask[async] module so that is not the issue anymore.
Show some code
To reproduce what I have right now you can either check the full repo here that has only 4 files or this sample should be enough to reproduce.
from server import socketio, app
from bot import bot
from dotenv import load_dotenv
import os
import asyncio
import threading
env_path = os.path.dirname(__file__) + "/.env"
load_dotenv(env_path)
def start_server():
socketio.run(app)
def start_bot():
token = os.getenv("BOT_TOKEN")
bot.run(token)
async def main():
# this doesn't achieve what I want and is the main part of the problem
start_server()
start_bot()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("Program exited")
Did I miss something?, point it out in the comments and I will add it in the edit.
Related
I want to test some function, that work with asyncpg. If I run one test at a time, it works fine. But if I run several tests at a time, all tests except the first one crash with the error asyncpg.exceptions._base.InterfaceError: cannot perform operation: another operation is in progress.
Tests:
#pytest.mark.asyncio
async def test_project_connection(superuser_id, project_id):
data = element_data_random(project_id)
element_id = (await resolve_element_create(data=data, user_id=superuser_id))["id"]
project_elements = (await db_projects_element_ids_get([project_id]))[project_id]
assert element_id in project_elements
#pytest.mark.asyncio
async def test_project_does_not_exist(superuser_id):
data = element_data_random(str(uuid.uuid4()))
with pytest.raises(ObjectWithIdDoesNotExistError):
await resolve_element_create(data=data, user_id=superuser_id)
All functions for work with db use pool look like:
async def <some_db_func>(*args):
pool = await get_pool()
await pool.execute(...) # or fetch/fetchrow/fetchval
How I get the pool:
db_pool = None
async def get_pool():
global db_pool
async def init(con):
await con.set_type_codec('jsonb', encoder=ujson.dumps, decoder=ujson.loads, schema='pg_catalog')
await con.set_type_codec('json', encoder=ujson.dumps, decoder=ujson.loads, schema='pg_catalog')
if not db_pool:
dockerfiles_dir = os.path.join(src_dir, 'dockerfiles')
env_path = os.path.join(dockerfiles_dir, 'dev.env')
try:
# When code and DB inside docker containers
host = 'postgres-docker'
socket.gethostbyname(host)
except socket.error:
# When code on localhost, but DB inside docker container
host = 'localhost'
load_dotenv(dotenv_path=env_path)
db_pool = await asyncpg.create_pool(
database=os.getenv("POSTGRES_DBNAME"),
user=os.getenv("POSTGRES_USER"),
password=os.getenv("POSTGRES_PASSWORD"),
host=host,
init=init
)
return db_pool
As far as I understand under the hood, asynсpg creates a new connection and runs the request inside that connection if you run the request through pool. Which makes it clear that each request should have its own connection. However, this error occurs, which is caused when one connection tries to handle two requests at the same time
Okay, thanks to #Adelin I realized that I need to run each asynchronous test synchronously. I I'm new to asyncio so I didn't understand it right away and found a solution.
It was:
#pytest.mark.asyncio
async def test_...(*args):
result = await <some_async_func>
assert result == excepted_result
It become:
def test_...(*args):
async def inner()
result = await <some_async_func>
assert result == excepted_result
asyncio.get_event_loop().run_until_complete(inner())
The problem happens because each test function create it's own event-loop and it make asyncpg-pool confused with what event-loop is for it.
You can change event-loop scope to "session" from "function" by below on conftest.py.
You don't need to make it sequentially.
import asyncio
import pytest
#pytest.yield_fixture(scope="session")
def event_loop(request):
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
I am writing a bot and I need to implement the following functionality: the bot once every 10 minutes(for example) parse a certain URL and if there were changes from the previous call, writes to the chat.
Since the bot is also engaged in other things, I decided to loop the parsing in the function with sleep at the end. If there are changes, I try to send a message to the chat, but then a problem happens.
Since a successful combination of circumstances does not arise from an event in the chat, I can't pull the "entity" from the "event" for the "send_message" function. therefore, we have to get through the "get_entity" function and links to the chat as a parameter, but for some reason this does not work from another stream. below is a simplified code:
import threading, queue
from time import sleep
import asyncio
from telethon.sync import TelegramClient, events
import config as cfg
bot = TelegramClient('Bot', cfg.api_id, cfg.api_hash)
#bot.on(events.NewMessage(pattern=r'^(?i)(idchat){1}$'))
async def echoidchat(event):
channelaa = await bot.get_entity('https://t.me/elvistest')
await bot.send_message(channelaa, 'ответ')
def parseurls():
for x in range(10):
q.put(x)
pass
async def pre_sendmsg():
while True:
try:
msg = q.get_nowait()
except Exception as e:
await asyncio.sleep(1.0)
else:
await sendmsg(msg)
q.task_done()
async def sendmsg(msg):
channel = await bot.get_entity('https://t.me/elvistest')
await bot.send_message(channel, f'ответ из другого потока {msg}')
if __name__ == '__main__':
q = queue.Queue()
parseurls()
bot.start(bot_token=cfg.bot_token)
threading.Thread(target=asyncio.run, daemon=True, args=(pre_sendmsg(),)).start()
bot.run_until_disconnected()
The thing is that on the line " boot.get_entity" nothing happens. The script execution is lost somewhere and does not go further, that is, the next line with "bot. send_message" is simply not executed. however, "def echoidchat" is working at this time.
Well done!
This is work like I want.
import random
import threading, queue
from time import sleep
import asyncio
from telethon import TelegramClient, events
import config as cfg
bot = TelegramClient('Bot', cfg.api_id, cfg.api_hash)
#bot.on(events.NewMessage(pattern=r'^(?i)(idchat){1}$'))
async def echoidchat(event):
await bot.send_message(event.chat, 'ответ')
async def parseurls():
while True:
ts = abs(int(random.random()*10))
print(f'parseurls({ts})')
await sendmsg(ts)
await asyncio.sleep(ts)
async def sendmsg(msg):
print(f'sendmsg({msg}) - start')
channel = await bot.get_entity('https://t.me/elvistest')
await bot.send_message(channel, f'ответ из другого потока {msg}')
print(f'sendmsg({msg}) - done')
def main():
bot.start(bot_token=cfg.bot_token)
loop = asyncio.get_event_loop()
tasks = [
loop.create_task(parseurls()),
loop.create_task(bot.run_until_disconnected()),
]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
if __name__ == '__main__':
main()
I want to run a simple background task in FastAPI, which involves some computation before dumping it into the database. However, the computation would block it from receiving any more requests.
from fastapi import BackgroundTasks, FastAPI
app = FastAPI()
db = Database()
async def task(data):
otherdata = await db.fetch("some sql")
newdata = somelongcomputation(data,otherdata) # this blocks other requests
await db.execute("some sql",newdata)
#app.post("/profile")
async def profile(data: Data, background_tasks: BackgroundTasks):
background_tasks.add_task(task, data)
return {}
What is the best way to solve this issue?
Your task is defined as async, which means fastapi (or rather starlette) will run it in the asyncio event loop.
And because somelongcomputation is synchronous (i.e. not waiting on some IO, but doing computation) it will block the event loop as long as it is running.
I see a few ways of solving this:
Use more workers (e.g. uvicorn main:app --workers 4). This will allow up to 4 somelongcomputation in parallel.
Rewrite your task to not be async (i.e. define it as def task(data): ... etc). Then starlette will run it in a separate thread.
Use fastapi.concurrency.run_in_threadpool, which will also run it in a separate thread. Like so:
from fastapi.concurrency import run_in_threadpool
async def task(data):
otherdata = await db.fetch("some sql")
newdata = await run_in_threadpool(lambda: somelongcomputation(data, otherdata))
await db.execute("some sql", newdata)
Or use asyncios's run_in_executor directly (which run_in_threadpool uses under the hood):
import asyncio
async def task(data):
otherdata = await db.fetch("some sql")
loop = asyncio.get_running_loop()
newdata = await loop.run_in_executor(None, lambda: somelongcomputation(data, otherdata))
await db.execute("some sql", newdata)
You could even pass in a concurrent.futures.ProcessPoolExecutor as the first argument to run_in_executor to run it in a separate process.
Spawn a separate thread / process yourself. E.g. using concurrent.futures.
Use something more heavy-handed like celery. (Also mentioned in the fastapi docs here).
If your task is CPU bound you could use multiprocessing, there is way to do that with Background task in FastAPI:
https://stackoverflow.com/a/63171013
Although you should consider to use something like Celery if there are lot of cpu-heavy tasks.
Read this issue.
Also in the example below, my_model.function_b could be any blocking function or process.
TL;DR
from starlette.concurrency import run_in_threadpool
#app.get("/long_answer")
async def long_answer():
rst = await run_in_threadpool(my_model.function_b, arg_1, arg_2)
return rst
This is a example of Background Task To FastAPI
from fastapi import FastAPI
import asyncio
app = FastAPI()
x = [1] # a global variable x
#app.get("/")
def hello():
return {"message": "hello", "x":x}
async def periodic():
while True:
# code to run periodically starts here
x[0] += 1
print(f"x is now {x}")
# code to run periodically ends here
# sleep for 3 seconds after running above code
await asyncio.sleep(3)
#app.on_event("startup")
async def schedule_periodic():
loop = asyncio.get_event_loop()
loop.create_task(periodic())
if __name__ == "__main__":
import uvicorn
uvicorn.run(app)
There are two things need to be done: host website and send notification.So I use the following ways to solve this problems:
from aiohttp import web
import asyncio
async def _send_proactive_message():
...
async def pre_init():
await asyncio.sleep(20)
await _send_proactive_message()
APP = web.Application()
APP.router.add_post("/api/messages", messages)
APP.router.add_get("/api/notify", notify)
if __name__ == '__main__':
event_loop = asyncio.get_event_loop()
try:
event_loop.create_task(pre_init())
web.run_app(APP, host="localhost", port=CONFIG.PORT)
finally:
event_loop.close()
Because there is one event_loop in web.run_app, I don't understand which one run first and how to control every event_loop.
Your way to create a task before starting event loop is ok, but only if run_app won't set and use another event loop.
Better way is to create tasks or other async objects once event loop is started. This way you will make sure created objects are attached to an active running event loop.
The best way to do it in your case is to use on_startup hook:
async def pre_init(app):
await _send_proactive_message()
async def make_app():
app = web.Application()
app.router.add_post("/api/messages", messages)
app.router.add_get("/api/notify", notify)
app.on_startup.append(pre_init)
return app
web.run_app(make_app())
I have a manager to cache some user settings. I want to cleanup it every hour for inactive users (10 seconds in my example). I try to use aiojobs for this. I spawn the same job inside the job coroutine.
from aiohttp import web
from aiojobs.aiohttp import setup, get_scheduler
import asyncio
async def cleanup(scheduler):
await asyncio.sleep(10)
print('do cleanup')
await scheduler.spawn(cleanup(scheduler))
async def handler(request):
if not request.app['init']:
scheduler = get_scheduler(request)
await scheduler.spawn(cleanup(scheduler))
request.app['init'] = True
return web.Response(text = 'ok')
def main():
app = web.Application()
app.router.add_get('/', handler)
setup(app)
app['init'] = False
web.run_app(app, host='127.0.0.1', port = 8000)
main()
Is it a good solution? Should I create my own scheduler because my job does not relate to a request?
I want some background tasks to run in the same loop as the aiohttp web server, even before any http requests arrive. It looks like aiojobs doesn't help me so I'm using something that looks like this. I'm using a janus queue because my real application makes blocking calls from another thread. I don't know aiosync well, so this may be the blind leading the blind.
import asyncio
from aiohttp import web
from aiojobs.aiohttp import setup
import janus
async def ticket_maker(q: janus.Queue):
counter = 1
while True:
print(f'Made ticket {counter}')
await q.async_q.put(counter)
await asyncio.sleep(1)
counter += 1
async def handler(request):
q: janus.Queue = request.app.get('Q')
ticket = await q.async_q.get()
return web.Response(text=f'You got ticket {ticket}')
def main():
q = janus.Queue()
app = web.Application()
asyncio.get_event_loop().create_task(ticket_maker(q))
app.router.add_get('/', handler)
app['Q'] = q
setup(app)
web.run_app(app, port=8080)
if __name__ == '__main__':
main()