Implementing Twisted style local multiple deferred callbacks in Celery - python

I am quite new to using Celery and was wondering how TWSITED type multiple deferred callbacks can be implemented in Celery
MY TWISTED CODE uses perspective broker and is as follows. I have a Handler (server) which handles some events and returns the result. The Dispatcher (Client) prints the result returned using a deferred callback.
Handler.py (Server)
from twisted.application import service, internet
from twisted.internet import reactor, task
from twisted.spread import pb
from Dispatcher import Event
from Dispatcher import CopyEvent
class ReceiverEvent(pb.RemoteCopy, Event):
pass
pb.setUnjellyableForClass(CopyEvent, ReceiverEvent)
class Handler(pb.Root):
def remote_eventEnqueue(self, pond):
d = task.deferLater(reactor,5,handle_event,sender=self)
return d
def handle_event(sender):
print "Do Something"
return "did something"
if __name__ == '__main__':
h=Handler()
reactor.listenTCP(8739, pb.PBServerFactory(h))
reactor.run()
Now the Dispatcher.py (Client)
from twisted.spread import pb, jelly
from twisted.python import log
from twisted.internet import reactor
from Event import Event
class CopyEvent(Event, pb.Copyable):
pass
class Dispatcher:
def __init__(self, event):
self.event = event
def dispatch_event(self, remote):
d = remote.callRemote("eventEnqueue", self.event)
d.addCallback(self.printMessage)
def printMessage(self, text):
print text
def main():
from Handler import CopyEvent
event = CopyEvent()
d = Dispatcher(event)
factory = pb.PBClientFactory()
reactor.connectTCP("localhost", 8739, factory)
deferred = factory.getRootObject()
deferred.addCallback(d.dispatch_event)
reactor.run()
if __name__ == '__main__':
main()
I tried implementing this in Celery.
Handler.py (Server)
from celery import Celery
app=Celery('tasks',backend='amqp',broker='amqp://guest#localhost//')
#app.task
def handle_event():
print "Do Something"
return "did something"
Dispatcher.py (Client)
from Handler import handle_event
from datetime import datetime
def print_message(text):
print text
t=handle_event.apply_async(countdown=10,link=print_message.s('Done')) ##HOWTO?
My exact question is how can one implement deferred callbacks TWISTED style on local functions like print_message in Celery. When handle_Event method is finished it returns result on which I would like to have another callback method (print_message) which is LOCAL
Any other possible Design workflow to do this in Celery?
Thanks
JR

Ok, so finally figured it out. It is not quite possible to add callbacks directly in the Celery client like the Twisted style. But Celery supports task monitoring functionality, that enables the client to monitor different kinds of worker events and add callbacks on it.
A simple task monitor (Task_Monitor.py) would look something like this. (Details can be found in Celery real processing documentation http://docs.celeryproject.org/en/latest/userguide/monitoring.html#real-time-processing)
Task_Monitor.py
from celery import Celery
def task_monitor(app):
state = app.events.State()
def announce_completed_tasks(event):
state.event(event)
task = state.tasks.get(event['uuid'])
print('TASK SUCCEEDED: %s[%s] %s' % (task.name, task.uuid, task.info(), ))
with app.connection() as connection:
recv = app.events.Receiver(connection, handlers={'task-succeeded': announce_completed_tasks})
recv.capture(limit=None, timeout=None, wakeup=True)
if __name__ == '__main__':
app = Celery(broker='amqp://guest#REMOTEHOST//')
task_monitor(app)
Task_Monitor.py has to be run as a separate process (client side). Besides the Celery application (server side) needs to be configured using
app.conf.CELERY_SEND_EVENTS = TRUE
or using -E option while running celery
so that it sends events in order for worker to be monitored.

I would recommend using chains or one of the similar mechanisms for the Celery Canvas docs.
Example taken from the docs:
>>> from celery import chain
>>> from proj.tasks import add, mul
# (4 + 4) * 8 * 10
>>> res = chain(add.s(4, 4), mul.s(8), mul.s(10))
proj.tasks.add(4, 4) | proj.tasks.mul(8) | proj.tasks.mul(10)
>>> res.apply_async()

Related

How to create only 1 class instance when using Gunicorn and multiple workers?

I have a simple Python backend using falcon and websockets. If a client makes a call to an endpoint (e.g., to submit data) all other connected clients are notified via their respective websocket connection, i.e., the backend makes a broadcast to all currently connected clients. In general, this works just fine. Here's the minimal script for the falcon app
import falcon
from db.dbmanager import DBManager
from ws.wsserver import WebSocketServer
from api.resources.liveqa import DemoResource
dbm = DBManager() # PostgreSQL connection pool; works fine with multiple workers
wss = WebSocketServer() # Works only with 1 worker
app = falcon.App()
demo_resource = DemoResource(dbm, wss)
app.add_route('/api/v1/demo', demo_resource)
And here is the code for the websockets server which I instantiate and pass the resource class:
import json
import asyncio
import websockets
import threading
class WebSocketServer:
def __init__(self):
self.clients = {}
self.start_server()
async def handler(self, ws, path):
session_id = path.split('/')[-1]
if session_id in self.clients:
self.clients[session_id].add(ws)
else:
self.clients[session_id] = {ws}
try:
async for msg in ws:
pass # The clients are not supposed to send anything
except websockets.ConnectionClosedError:
pass
finally:
self.clients[session_id].remove(ws)
async def send(self, client, msg):
await client.send(msg)
def broadcast(self, session_id, msg):
if session_id not in self.clients:
return
for client in self.clients[session_id]:
try:
asyncio.run(self.send(client, json.dumps(msg)))
except:
pass
def start_server(self):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
start_server = websockets.serve(self.handler, host='111.111.111.111', port=5555)
asyncio.get_event_loop().run_until_complete(start_server)
threading.Thread(target=asyncio.get_event_loop().run_forever).start()
I use Gunicorn as server for the backend, and it works if I use just 1 worker. However, if I try --workers 2 I get the error that port 5555 is already in use. I guess this makes sense as each worker is trying to create a WebSocketServer instance using the same ip/port-pair.
What is the best / cleanest / most phytonic way to address this? I assume that I have to ensure that only one WebSocketServer instance is created. But how?
On a side note, I assume that a DBManager instance get created for each worker as well. While it doesn't throw an error as there can be multiple connections pools, I guess ensuring a single instance of DBManager is also the preferred way.
First of all, even running with one worker is potentially problematic, because Gunicorn is primarily a pre-forking server, and forking a process with threads is, in general, unsafe and may lead to unpredictable results.
One way to solve this is to use Gunicorn's server hooks to only start a thread (in this case a WebSocket server) in one of the workers, and only do that after forking. For instance,
import logging
import os
import threading
import falcon
import gunicorn.app.base
logging.basicConfig(
format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO)
class HelloWorld:
def on_get(self, req, resp):
resp.media = {'message': 'Hello, World!'}
def do_something(fork_nr):
pid = os.getpid()
logging.info(f'in a thread, {pid=}')
if fork_nr == 1:
logging.info('we could start a WebSocket server...')
else:
logging.info('not the first worker, not starting any servers')
class HybridApplication(gunicorn.app.base.BaseApplication):
forks = 0
#classmethod
def pre_fork(cls, server, worker):
logging.info(f'about to fork a new worker #{cls.forks}')
cls.forks += 1
#classmethod
def post_fork(cls, server, worker):
thread = threading.Thread(
target=do_something, args=(cls.forks,), daemon=True)
thread.start()
def __init__(self):
self.options = {
'bind': '127.0.0.1:8000',
'pre_fork': self.pre_fork,
'post_fork': self.post_fork,
'workers': 4,
}
self.application = falcon.App()
self.application.add_route('/hello', HelloWorld())
super().__init__()
def load_config(self):
config = {key: value for key, value in self.options.items()
if key in self.cfg.settings and value is not None}
for key, value in config.items():
self.cfg.set(key.lower(), value)
def load(self):
return self.application
if __name__ == '__main__':
HybridApplication().run()
This simplistic prototype is not infallible, as we should also handle server reloads, the worker getting killed, etc. Speaking of which, you should probably use another worker type than sync for potentially long running requests, or set a long timeout, because otherwise the worker can get killed, taking the WebSocket thread with it. Specifying a number of threads should automatically change your worker type into gthread.
Note that here I implemented a custom Gunicorn application, but you could achieve the same effect by specifying hooks via a configuration file.
Another option is to use the ASGI flavour of Falcon, and implement even the WebSocket part inside your app:
import asyncio
import logging
import falcon.asgi
logging.basicConfig(
format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO)
class HelloWorld:
async def on_get(self, req, resp):
resp.media = {'message': 'Hello, World!'}
async def on_websocket(self, req, ws):
await ws.accept()
logging.info(f'WS accepted {req.path=}')
try:
while True:
await ws.send_media({'message': 'hi'})
await asyncio.sleep(10)
finally:
logging.info(f'WS disconnected {req.path=}')
app = falcon.asgi.App()
app.add_route('/hello', HelloWorld())
Note that Gunicorn itself does not "speak" ASGI, so you would either need to use an ASGI app server, or use Gunicorn as a process manager for Uvicorn workers.
For instance, assuming your file is called test.py, you could run Uvicorn directly as:
pip install uvicorn[standard]
uvicorn test:app
However, if you went the ASGI route, you would need to implement your responders as coroutine functions (async def on_get(...) etc), or run your synchronous DB code in a threadpool executor.

How to avoid ReactorNotRestartable in Autobahn Python

I have a python based page which recieves data by POST, which is then forwarded to the Crossbar server using Autobahn (Wamp). It works well the first 1-2 times but when it's called again after that it throws ReactorNotRestartable.
Now, I need this to work whichever way possible, either by reusing this "Reactor" based on a conditional check or by stopping it properly after every run. (The first one would be preferable because it might reduce the execution time)
Thanks for your help!
Edit:
This is in a webpage (Django View) so it needs to run as many times as the page is loaded/data is sent to it via POST.
from twisted.internet import reactor
from twisted.internet.defer import inlineCallbacks
from twisted.internet.endpoints import TCP4ClientEndpoint
from twisted.application.internet import ClientService
from autobahn.wamp.types import ComponentConfig
from autobahn.twisted.wamp import ApplicationSession, WampWebSocketClientFactory
class MyAppSession(ApplicationSession):
def __init__(self, config):
ApplicationSession.__init__(self, config)
def onConnect(self):
self.join(self.config.realm)
def onChallenge(self, challenge):
pass
#inlineCallbacks
def onJoin(self, details):
yield self.call('receive_data', data=message)
yield self.leave()
def onLeave(self, details):
self.disconnect()
def onDisconnect(self):
reactor.stop()
message = "data from POST[]"
session = MyAppSession(ComponentConfig('realm_1', {}))
transport = WampWebSocketClientFactory(session, url='ws://127.0.0.1:8080')
endpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8080)
service = ClientService(endpoint, transport)
service.startService()
reactor.run()
I figured out a probably hacky-and-not-so-good way by using multiprocessing and putting reactor.stop() inside onJoin() right after the function call. This way I don't have to bother with the "twisted running in the main thread" thing because its process gets killed as soon as my work is done.
Is there a better way?

Python - How to use FastAPI and uvicorn.run without blocking the thread?

I'm looking for a possibility to use uvicorn.run() with a FastAPI app but without uvicorn.run() is blocking the thread. I already tried to use processes, subprocessesand threads but nothing worked.
My problem is that I want to start the Server from another process that should go on with other tasks after starting the server. Additinally I have problems closing the server like this from another process.
Has anyone an idea how to use uvicorn.run() non blocking and how to stop it from another process?
Approach given by #HadiAlqattan will not work because uvicorn.run expects to be run in the main thread. Errors such as signal only works in main thread will be raised.
Correct approach is:
import contextlib
import time
import threading
import uvicorn
class Server(uvicorn.Server):
def install_signal_handlers(self):
pass
#contextlib.contextmanager
def run_in_thread(self):
thread = threading.Thread(target=self.run)
thread.start()
try:
while not self.started:
time.sleep(1e-3)
yield
finally:
self.should_exit = True
thread.join()
config = uvicorn.Config("example:app", host="127.0.0.1", port=5000, log_level="info")
server = Server(config=config)
with server.run_in_thread():
# Server is started.
...
# Server will be stopped once code put here is completed
...
# Server stopped.
Very handy to run a live test server locally using a pytest fixture:
# conftest.py
import pytest
#pytest.fixture(scope="session")
def server():
server = ...
with server.run_in_thread():
yield
Credits: uvicorn#742 by florimondmanca
This is an alternate version which works and was inspired by Aponace uvicorn#1103. The uvicorn maintainers want more community engagement with this issue, so if you are experiencing it, please join the conversation.
Example conftest.py file.
import pytest
from fastapi.testclient import TestClient
from app.main import app
import multiprocessing
from uvicorn import Config, Server
class UvicornServer(multiprocessing.Process):
def __init__(self, config: Config):
super().__init__()
self.server = Server(config=config)
self.config = config
def stop(self):
self.terminate()
def run(self, *args, **kwargs):
self.server.run()
#pytest.fixture(scope="session")
def server():
config = Config("app.main:app", host="127.0.0.1", port=5000, log_level="debug")
instance = UvicornServer(config=config)
instance.start()
yield instance
instance.stop()
#pytest.fixture(scope="module")
def mock_app(server):
client = TestClient(app)
yield client
Example test_app.py file.
def test_root(mock_app):
response = mock_app.get("")
assert response.status_code == 200
When I set reload to False, fastapi will start a multi-process web service. If it is true, there will only be one process for the web service
import uvicorn
from fastapi import FastAPI, APIRouter
from multiprocessing import cpu_count
import os
router = APIRouter()
app = FastAPI()
#router.post("/test")
async def detect_img():
print("pid:{}".format(os.getpid()))
return os.getpid
if __name__ == '__main__':
app.include_router(router)
print("cpu个数:{}".format(cpu_count()))
workers = 2*cpu_count() + 1
print("workers:{}".format(workers))
reload = False
#reload = True
uvicorn.run("__main__:app", host="0.0.0.0", port=8082, reload=reload, workers=workers, timeout_keep_alive=5,
limit_concurrency=100)
According to Uvicorn documentation there is no programmatically way to stop the server.
instead, you can stop the server only by pressing ctrl + c (officially).
But I have a trick to solve this problem programmatically using multiprocessing standard lib with these three simple functions :
A run function to run the server.
A start function to start a new process (start the server).
A stop function to join the process (stop the server).
from multiprocessing import Process
import uvicorn
# global process variable
proc = None
def run():
"""
This function to run configured uvicorn server.
"""
uvicorn.run(app=app, host=host, port=port)
def start():
"""
This function to start a new process (start the server).
"""
global proc
# create process instance and set the target to run function.
# use daemon mode to stop the process whenever the program stopped.
proc = Process(target=run, args=(), daemon=True)
proc.start()
def stop():
"""
This function to join (stop) the process (stop the server).
"""
global proc
# check if the process is not None
if proc:
# join (stop) the process with a timeout setten to 0.25 seconds.
# using timeout (the optional arg) is too important in order to
# enforce the server to stop.
proc.join(0.25)
With the same idea you can :
use threading standard lib instead of using multiprocessing standard lib.
refactor these functions into a class.
Example of usage :
from time import sleep
if __name__ == "__main__":
# to start the server call start function.
start()
# run some codes ....
# to stop the server call stop function.
stop()
You can read more about :
Uvicorn server.
multiprocessing standard lib.
threading standard lib.
Concurrency to know more about multi processing and threading in python.

Creating a processing queue in Tornado

I'm using a Tornado web server to queue up items that need to be processed outside of the request/response cycle.
In my simplified example below, every time a request comes in, I add a new string to a list called queued_items. I want to create something that will watch that list and process the items as they show up in it.
(In my real code, the items are processed and sent over a TCP socket which may or may not be connected when the web request arrives. I want the web server to keep queuing up items regardless of the socket connection)
I'm trying to keep this code simple and not use external queues/programs like Redis or Beanstalk. It's not going to have very high volume.
What's a good way using Tornado idioms to watch the client.queued_items list for new items and process them as they arrive?
import time
import tornado.ioloop
import tornado.gen
import tornado.web
class Client():
def __init__(self):
self.queued_items = []
#tornado.gen.coroutine
def watch_queue(self):
# I have no idea what I'm doing
items = yield client.queued_items
# go_do_some_thing_with_items(items)
class IndexHandler(tornado.web.RequestHandler):
def get(self):
client.queued_items.append("%f" % time.time())
self.write("Queued a new item")
if __name__ == "__main__":
client = Client()
# Watch the queue for when new items show up
client.watch_queue()
# Create the web server
application = tornado.web.Application([
(r'/', IndexHandler),
], debug=True)
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
There is a library called toro, which provides synchronization primitives for tornado. [Update: As of tornado 4.2, toro has been merged into tornado.]
Sounds like you could just use a toro.Queue (or tornado.queues.Queue in tornado 4.2+) to handle this:
import time
import toro
import tornado.ioloop
import tornado.gen
import tornado.web
class Client():
def __init__(self):
self.queued_items = toro.Queue()
#tornado.gen.coroutine
def watch_queue(self):
while True:
items = yield self.queued_items.get()
# go_do_something_with_items(items)
class IndexHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self):
yield client.queued_items.put("%f" % time.time())
self.write("Queued a new item")
if __name__ == "__main__":
client = Client()
# Watch the queue for when new items show up
tornado.ioloop.IOLoop.current().add_callback(client.watch_queue)
# Create the web server
application = tornado.web.Application([
(r'/', IndexHandler),
], debug=True)
application.listen(8888)
tornado.ioloop.IOLoop.current().start()
There are a few tweaks required, aside from switching the data structure from a list to a toro.Queue:
We need to schedule watch_queue to run inside the IOLoop using add_callback, rather than trying to call it directly outside of an IOLoop context.
IndexHandler.get needs to be converted to a coroutine, because toro.Queue.put is a coroutine.
I also added a while True loop to watch_queue, so that it will run forever, rather than just processing one item and then exiting.

How do I write a python HTTP server to listen on multiple ports?

I'm writing a small web server in Python, using BaseHTTPServer and a custom subclass of BaseHTTPServer.BaseHTTPRequestHandler. Is it possible to make this listen on more than one port?
What I'm doing now:
class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def doGET
[...]
class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
pass
server = ThreadingHTTPServer(('localhost', 80), MyRequestHandler)
server.serve_forever()
Sure; just start two different servers on two different ports in two different threads that each use the same handler. Here's a complete, working example that I just wrote and tested. If you run this code then you'll be able to get a Hello World webpage at both http://localhost:1111/ and http://localhost:2222/
from threading import Thread
from SocketServer import ThreadingMixIn
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write("Hello World!")
class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
daemon_threads = True
def serve_on_port(port):
server = ThreadingHTTPServer(("localhost",port), Handler)
server.serve_forever()
Thread(target=serve_on_port, args=[1111]).start()
serve_on_port(2222)
update:
This also works with Python 3 but three lines need to be slightly changed:
from socketserver import ThreadingMixIn
from http.server import HTTPServer, BaseHTTPRequestHandler
and
self.wfile.write(bytes("Hello World!", "utf-8"))
Not easily. You could have two ThreadingHTTPServer instances, write your own serve_forever() function (don't worry it's not a complicated function).
The existing function:
def serve_forever(self, poll_interval=0.5):
"""Handle one request at a time until shutdown.
Polls for shutdown every poll_interval seconds. Ignores
self.timeout. If you need to do periodic tasks, do them in
another thread.
"""
self.__serving = True
self.__is_shut_down.clear()
while self.__serving:
# XXX: Consider using another file descriptor or
# connecting to the socket to wake this up instead of
# polling. Polling reduces our responsiveness to a
# shutdown request and wastes cpu at all other times.
r, w, e = select.select([self], [], [], poll_interval)
if r:
self._handle_request_noblock()
self.__is_shut_down.set()
So our replacement would be something like:
def serve_forever(server1,server2):
while True:
r,w,e = select.select([server1,server2],[],[],0)
if server1 in r:
server1.handle_request()
if server2 in r:
server2.handle_request()
I would say that threading for something this simple is overkill. You're better off using some form of asynchronous programming.
Here is an example using Twisted:
from twisted.internet import reactor
from twisted.web import resource, server
class MyResource(resource.Resource):
isLeaf = True
def render_GET(self, request):
return 'gotten'
site = server.Site(MyResource())
reactor.listenTCP(8000, site)
reactor.listenTCP(8001, site)
reactor.run()
I also thinks it looks a lot cleaner to have each port be handled in the same way, instead of having the main thread handle one port and an additional thread handle the other. Arguably that can be fixed in the thread example, but then you're using three threads.

Categories

Resources