How to run discord.py tasks within celery? - python

I tried to create a celery task that returns the roles from my discord guild.: (proof of concept)
...
from asgiref.sync import async_to_sync
from celery import shared_task, Task
from discord.http import HTTPClient
class ChatTask(Task):
_client = None
#property
def client(self) -> HTTPClient:
if self._client is None:
client = HTTPClient()
result = async_to_sync(
lambda: client.static_login(settings.BOT_TOKEN, bot=True)
)()
self._client = client
return self._client
#shared_task(base=ChatTask, bind=True)
def get_roles(self):
# Still testing!
client: HTTPClient = self.client
print(client) # Seems to be fine: `<discord.http.HTTPClient object at 0x7fcc6728dbb0>`
roles = async_to_sync(lambda: async_get_roles(client))() # ERROR!: `RuntimeError('Event loop is closed')`
print(roles)
return roles
However I keep getting RuntimeError('Event loop is closed').
Comment: I do know that celery v5 will support async/await. But I do not want to wait until the release at the end of the year. ;)
How can I fix this code to get rid of the error and actually get see the roles printed out to the console?

Related

How to make each client get their state if there is class instance in grpc-python server side?

I want to use grpc-python in the following scenario, but I don' t know how to realize it.
The scenario is that, in the python server, it uses class to calculate and update the instance' s state, then sends such state to corresponding client; in the client side, more than one clients need to communicate with the server to get its one result and not interfered by others.
Specifically, suppose there is a class with initial value self.i =0, then each time the client calls the class' s update function, it does self.i=self.i+1 and returns self.i. Actually there are two clients call such update function simultaneously, like when client1 calls update at third time, client2 calls update at first time.
I think this may can be solved by creating thread for each client to avoid conflict. If the new client calls, new thead will be created; if existing client calls, existing thread will be used. But I don' t know how to realize it?
Hope you can help me. Thanks in advance.
I think I solved this problem by myself. If you have any other better solutions, you can post here.
I edited helloworld example in grpc-python introduction to explain my aim.
For helloworld.proto
syntax = "proto3";
option java_multiple_files = true;
option java_package = "io.grpc.examples.helloworld";
option java_outer_classname = "HelloWorldProto";
option objc_class_prefix = "HLW";
package helloworld;
// The greeting service definition.
service Greeter {
// Sends a greeting
rpc SayHello (HelloRequest) returns (HelloReply) {}
rpc Unsubscribe (HelloRequest) returns (HelloReply) {}
}
// The request message containing the user's name.
message HelloRequest {
string name = 1;
}
// The response message containing the greetings
message HelloReply {
string message = 1;
}
I add Unsubsribe function to allow one specific client to diconnect from server.
In hello_server.py
import grpc
import helloworld_pb2
import helloworld_pb2_grpc
import threading
from threading import RLock
import time
from concurrent import futures
import logging
class Calcuate:
def __init__(self):
self.i = 0
def add(self):
self.i+=1
return self.i
class PeerSet(object):
def __init__(self):
self._peers_lock = RLock()
self._peers = {}
self.instances = {}
def connect(self, peer):
with self._peers_lock:
if peer not in self._peers:
print("Peer {} connecting".format(peer))
self._peers[peer] = 1
a = Calcuate()
self.instances[peer] = a
output = a.add()
return output
else:
self._peers[peer] += 1
a = self.instances[peer]
output = a.add()
return output
def disconnect(self, peer):
print("Peer {} disconnecting".format(peer))
with self._peers_lock:
if peer not in self._peers:
raise RuntimeError("Tried to disconnect peer '{}' but it was never connected.".format(peer))
del self._peers[peer]
del self.instances[peer]
def peers(self):
with self._peers_lock:
return self._peers.keys()
class Greeter(helloworld_pb2_grpc.GreeterServicer):
def __init__(self):
self._peer_set = PeerSet()
def _record_peer(self, context):
return self._peer_set.connect(context.peer())
def SayHello(self, request, context):
output = self._record_peer(context)
print("[thread {}] Peers: {}, output: {}".format(threading.currentThread().ident, self._peer_set.peers(), output))
time.sleep(1)
return helloworld_pb2.HelloReply(message='Hello, {}, {}!'.format(request.name, output))
def Unsubscribe(self, request, context):
self._peer_set.disconnect(context.peer())
return helloworld_pb2.HelloReply(message='{} disconnected!'.format(context.peer()))
def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
helloworld_pb2_grpc.add_GreeterServicer_to_server(Greeter(), server)
server.add_insecure_port('[::]:50051')
server.start()
server.wait_for_termination()
if __name__ == '__main__':
logging.basicConfig()
serve()
The use of context.peer() is adapted from Richard Belleville' s answer in this post. You can change add() function to any other functions that can be used to update instance' s state.
In hello_client.py
from __future__ import print_function
import logging
import grpc
import helloworld_pb2
import helloworld_pb2_grpc
def run():
# NOTE(gRPC Python Team): .close() is possible on a channel and should be
# used in circumstances in which the with statement does not fit the needs
# of the code.
with grpc.insecure_channel('localhost:50051') as channel:
stub = helloworld_pb2_grpc.GreeterStub(channel)
response = stub.SayHello(helloworld_pb2.HelloRequest(name='you'))
print("Greeter client received: " + response.message)
response = stub.SayHello(helloworld_pb2.HelloRequest(name='Tom'))
print("Greeter client received: " + response.message)
response = stub.SayHello(helloworld_pb2.HelloRequest(name='Jerry'))
print("Greeter client received: " + response.message)
stub.Unsubscribe(helloworld_pb2.HelloRequest(name="end"))
if __name__ == '__main__':
logging.basicConfig()
run()
If we run serveral hello_client.py simultaneously, the server can distinguish the different clients and send correct corresponding info to them.

How to send a progress of operation in a FastAPI app?

I have deployed a fastapi endpoint,
from fastapi import FastAPI, UploadFile
from typing import List
app = FastAPI()
#app.post('/work/test')
async def testing(files: List(UploadFile)):
for i in files:
.......
# do a lot of operations on each file
# after than I am just writing that processed data into mysql database
# cur.execute(...)
# cur.commit()
.......
# just returning "OK" to confirm data is written into mysql
return {"response" : "OK"}
I can request output from the API endpoint and its working fine for me perfectly.
Now, the biggest challenge for me to know how much time it is taking for each iteration. Because in the UI part (those who are accessing my API endpoint) I want to help them show a progress bar (TIME TAKEN) for each iteration/file being processed.
Is there any possible way for me to achieve it? If so, please help me out on how can I proceed further?
Thank you.
Approaches
Polling
The most preferred approach to track the progress of a task is polling:
After receiving a request to start a task on a backend:
Create a task object in the storage (e.g in-memory, redis and etc.). The task object must contain the following data: task ID, status (pending, completed), result, and others.
Run task in the background (coroutines, threading, multiprocessing, task queue like Celery, arq, aio-pika, dramatiq and etc.)
Response immediately the answer 202 (Accepted) by returning the previously received task ID.
Update task status:
This can be from within the task itself, if it knows about the task store and has access to it. Periodically, the task itself updates information about itself.
Or use a task monitor (Observer, producer-consumer pattern), which will monitor the status of the task and its result. And it will also update the information in the storage.
On the client side (front-end) start a polling cycle for the task status to endpoint /task/{ID}/status, which takes information from the task storage.
Streaming response
Streaming is a less convenient way of getting the status of request processing periodically. When we gradually push responses without closing the connection. It has a number of significant disadvantages, for example, if the connection is broken, you can lose information. Streaming Api is another approach than REST Api.
Websockets
You can also use websockets for real-time notifications and bidirectional communication.
Links:
Examples of polling approach for the progress bar and a more detailed description for django + celery can be found at these links:
https://www.dangtrinh.com/2013/07/django-celery-display-progress-bar-of.html
https://buildwithdjango.com/blog/post/celery-progress-bars/
I have provided simplified examples of running background tasks in FastAPI using multiprocessing here:
https://stackoverflow.com/a/63171013/13782669
Old answer:
You could run a task in the background, return its id and provide a /status endpoint that the front would periodically call. In the status response, you could return what state your task is now (for example, pending with the number of the currently processed file). I provided a few simple examples here.
Demo
Polling
Demo of the approach using asyncio tasks (single worker solution):
import asyncio
from http import HTTPStatus
from fastapi import BackgroundTasks
from typing import Dict, List
from uuid import UUID, uuid4
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel, Field
class Job(BaseModel):
uid: UUID = Field(default_factory=uuid4)
status: str = "in_progress"
progress: int = 0
result: int = None
app = FastAPI()
jobs: Dict[UUID, Job] = {} # Dict as job storage
async def long_task(queue: asyncio.Queue, param: int):
for i in range(1, param): # do work and return our progress
await asyncio.sleep(1)
await queue.put(i)
await queue.put(None)
async def start_new_task(uid: UUID, param: int) -> None:
queue = asyncio.Queue()
task = asyncio.create_task(long_task(queue, param))
while progress := await queue.get(): # monitor task progress
jobs[uid].progress = progress
jobs[uid].status = "complete"
#app.post("/new_task/{param}", status_code=HTTPStatus.ACCEPTED)
async def task_handler(background_tasks: BackgroundTasks, param: int):
new_task = Job()
jobs[new_task.uid] = new_task
background_tasks.add_task(start_new_task, new_task.uid, param)
return new_task
#app.get("/task/{uid}/status")
async def status_handler(uid: UUID):
return jobs[uid]
Adapted example for loop from question
Background processing function is defined as def and FastAPI runs it on the thread pool.
import time
from http import HTTPStatus
from fastapi import BackgroundTasks, UploadFile, File
from typing import Dict, List
from uuid import UUID, uuid4
from fastapi import FastAPI
from pydantic import BaseModel, Field
class Job(BaseModel):
uid: UUID = Field(default_factory=uuid4)
status: str = "in_progress"
processed_files: List[str] = Field(default_factory=list)
app = FastAPI()
jobs: Dict[UUID, Job] = {}
def process_files(task_id: UUID, files: List[UploadFile]):
for i in files:
time.sleep(5) # pretend long task
# ...
# do a lot of operations on each file
# then append the processed file to a list
# ...
jobs[task_id].processed_files.append(i.filename)
jobs[task_id].status = "completed"
#app.post('/work/test', status_code=HTTPStatus.ACCEPTED)
async def work(background_tasks: BackgroundTasks, files: List[UploadFile] = File(...)):
new_task = Job()
jobs[new_task.uid] = new_task
background_tasks.add_task(process_files, new_task.uid, files)
return new_task
#app.get("/work/{uid}/status")
async def status_handler(uid: UUID):
return jobs[uid]
Streaming
async def process_files_gen(files: List[UploadFile]):
for i in files:
time.sleep(5) # pretend long task
# ...
# do a lot of operations on each file
# then append the processed file to a list
# ...
yield f"{i.filename} processed\n"
yield f"OK\n"
#app.post('/work/stream/test', status_code=HTTPStatus.ACCEPTED)
async def work(files: List[UploadFile] = File(...)):
return StreamingResponse(process_files_gen(files))
Below is solution which uses uniq identifiers and globally available dictionary which holds information about the jobs:
NOTE: Code below is safe to use until you use dynamic keys values ( In sample uuid in use) and keep application within single process.
To start the app create a file main.py
Run uvicorn main:app --reload
Create job entry by accessing http://127.0.0.1:8000/
Repeat step 3 to create multiple jobs
Go to http://127.0.0.1/status page to see page statuses.
Go to http://127.0.0.1/status/{identifier} to see progression of the job by the job id.
Code of app:
from fastapi import FastAPI, UploadFile
import uuid
from typing import List
import asyncio
context = {'jobs': {}}
app = FastAPI()
async def do_work(job_key, files=None):
iter_over = files if files else range(100)
for file, file_number in enumerate(iter_over):
jobs = context['jobs']
job_info = jobs[job_key]
job_info['iteration'] = file_number
job_info['status'] = 'inprogress'
await asyncio.sleep(1)
pending_jobs[job_key]['status'] = 'done'
#app.post('/work/test')
async def testing(files: List[UploadFile]):
identifier = str(uuid.uuid4())
context[jobs][identifier] = {}
asyncio.run_coroutine_threadsafe(do_work(identifier, files), loop=asyncio.get_running_loop())
return {"identifier": identifier}
#app.get('/')
async def get_testing():
identifier = str(uuid.uuid4())
context['jobs'][identifier] = {}
asyncio.run_coroutine_threadsafe(do_work(identifier), loop=asyncio.get_running_loop())
return {"identifier": identifier}
#app.get('/status')
def status():
return {
'all': list(context['jobs'].values()),
}
#app.get('/status/{identifier}')
async def status(identifier):
return {
"status": context['jobs'].get(identifier, 'job with that identifier is undefined'),
}

sending message to client using Django Channels from Celery tasks.py

I'm trying to use channels(v2.1.7) in django to send messages from server to client. When i execute the celery task below, my message is not being fetched in consumers.py(so not being sent to client) and surprisingly no error occures.
I'm able to send message from consumers to client directly. But i couldn't manage to send from outside of consumers using async_to_sync().
(I tried to use async_to_sync method in standard django views.py and i had same problem)
wololo/tasks.py
#app.task(name='wololo.tasks.upgrade_building')
def upgrade_building(user_id):
os.environ['DJANGO_SETTINGS_MODULE'] = 'DjangoFirebaseProject.settings'
from channels.layers import get_channel_layer
channel_layer = get_channel_layer()
print(channel_layer, "wololo")
async_to_sync(channel_layer.send)('chat', {
'type': 'hello.message',
'message': 'hadiInsss',
})
return True
wololo/consumers.py
from channels.generic.websocket import WebsocketConsumer
import json
from asgiref.sync import async_to_sync
class ChatConsumer(WebsocketConsumer):
def connect(self):
async_to_sync(self.channel_layer.group_add)("chat", self.channel_name)
self.accept()
def disconnect(self, close_code):
async_to_sync(self.channel_layer.group_discard)("chat", self.channel_name)
def hello_message(self, event):
print("U MUST SEE THAT MSG")
# Send a message down to the client
self.send(text_data=json.dumps(event['message']))
the result that i have in celery terminal
click to see celery terminal
Thanks in advance
It looks like you are using the channel_layer.send method, but I think you actually want to use channel_layer.group_send instead.

handling async streaming request in grpc python

I am trying to understand how to handle a grpc api with bidirectional streaming (using the Python API).
Say I have the following simple server definition:
syntax = "proto3";
package simple;
service TestService {
rpc Translate(stream Msg) returns (stream Msg){}
}
message Msg
{
string msg = 1;
}
Say that the messages that will be sent from the client come asynchronously ( as a consequence of user selecting some ui elements).
The generated python stub for the client will contain a method Translate that will accept a generator function and will return an iterator.
What is not clear to me is how would I write the generator function that will return messages as they are created by the user. Sleeping on the thread while waiting for messages doesn't sound like the best solution.
This is a bit clunky right now, but you can accomplish your use case as follows:
#!/usr/bin/env python
from __future__ import print_function
import time
import random
import collections
import threading
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
import grpc
from translate_pb2 import Msg
from translate_pb2_grpc import TestServiceStub
from translate_pb2_grpc import TestServiceServicer
from translate_pb2_grpc import add_TestServiceServicer_to_server
def translate_next(msg):
return ''.join(reversed(msg))
class Translator(TestServiceServicer):
def Translate(self, request_iterator, context):
for req in request_iterator:
print("Translating message: {}".format(req.msg))
yield Msg(msg=translate_next(req.msg))
class TranslatorClient(object):
def __init__(self):
self._stop_event = threading.Event()
self._request_condition = threading.Condition()
self._response_condition = threading.Condition()
self._requests = collections.deque()
self._last_request = None
self._expected_responses = collections.deque()
self._responses = {}
def _next(self):
with self._request_condition:
while not self._requests and not self._stop_event.is_set():
self._request_condition.wait()
if len(self._requests) > 0:
return self._requests.popleft()
else:
raise StopIteration()
def next(self):
return self._next()
def __next__(self):
return self._next()
def add_response(self, response):
with self._response_condition:
request = self._expected_responses.popleft()
self._responses[request] = response
self._response_condition.notify_all()
def add_request(self, request):
with self._request_condition:
self._requests.append(request)
with self._response_condition:
self._expected_responses.append(request.msg)
self._request_condition.notify()
def close(self):
self._stop_event.set()
with self._request_condition:
self._request_condition.notify()
def translate(self, to_translate):
self.add_request(to_translate)
with self._response_condition:
while True:
self._response_condition.wait()
if to_translate.msg in self._responses:
return self._responses[to_translate.msg]
def _run_client(address, translator_client):
with grpc.insecure_channel('localhost:50054') as channel:
stub = TestServiceStub(channel)
responses = stub.Translate(translator_client)
for resp in responses:
translator_client.add_response(resp)
def main():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
add_TestServiceServicer_to_server(Translator(), server)
server.add_insecure_port('[::]:50054')
server.start()
translator_client = TranslatorClient()
client_thread = threading.Thread(
target=_run_client, args=('localhost:50054', translator_client))
client_thread.start()
def _translate(to_translate):
return translator_client.translate(Msg(msg=to_translate)).msg
translator_pool = futures.ThreadPoolExecutor(max_workers=4)
to_translate = ("hello", "goodbye", "I", "don't", "know", "why",)
translations = translator_pool.map(_translate, to_translate)
print("Translations: {}".format(zip(to_translate, translations)))
translator_client.close()
client_thread.join()
server.stop(None)
if __name__ == "__main__":
main()
The basic idea is to have an object called TranslatorClient running on a separate thread, correlating requests and responses. It expects that responses will return in the order that requests were sent out. It also implements the iterator interface so that you can pass it directly to an invocation of the Translate method on your stub.
We spin up a thread running _run_client which pulls responses out of TranslatorClient and feeds them back in the other end with add_response.
The main function I included here is really just a strawman since I don't have the particulars of your UI code. I'm running _translate in a ThreadPoolExecutor to demonstrate that, even though translator_client.translate is synchronous, it yields, allowing you to have multiple in-flight requests at once.
We recognize that this is a lot of code to write for such a simple use case. Ultimately, the answer will be asyncio support. We have plans for this in the not-too-distant future. But for the moment, this sort of solution should keep you going whether you're running python 2 or python 3.

Async not working when using pykafka and asyncio

I try to call multiple pykafka consumer function using async. However, the first pykafka consumer function will block the other function from working.
The QueueConsumer lib:
import json
from pykafka import KafkaClient
import configparser
import asyncio
class QueueConsumer(object):
def __init__(self):
config = configparser.ConfigParser()
config.read('config.ini')
self.config = config
async def test(self):
defaultTopic = 'test'
client = KafkaClient(hosts=self.config['kafka']['host'])
topic = client.topics[defaultTopic.encode('utf-8')]
consumer = topic.get_simple_consumer()
# msg = next(consumer)
for message in consumer:
print(defaultTopic+' '+message.value.decode("utf-8"))
async def coba(self):
defaultTopic = 'coba'
client = KafkaClient(hosts=self.config['kafka']['host'])
topic = client.topics[defaultTopic.encode('utf-8')]
consumer = topic.get_simple_consumer()
# msg = next(consumer)
for message in consumer:
print(defaultTopic+' '+message.value.decode("utf-8"))
Then I call those function using:
import asyncio
queueConsumer = QueueConsumer()
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(
queueConsumer.test(),
queueConsumer.coba(),
))
loop.close()
The result will only return queue message from topic 'test' only.
Edit:
I try to add another function
async def factorial(self, name, number):
f = 1
for i in range(2, number+1):
print("Task %s: Compute factorial(%s)..." % (name, i))
await asyncio.sleep(1)
f *= i
print("Task %s: factorial(%s) = %s" % (name, number, f))
And then called like:
queueConsumer.test(),
queueConsumer.coba(),
queueConsumer.factorial('a',3),
queueConsumer.factorial('b',5),
queueConsumer.factorial('c',7),
Some print from factorial function is executed. But when print from either test or coba is called, then it just stop the others.
SimpleConsumer.consume is a blocking call, so you'll need to adjust your code to periodically poll for new messages while relinquishing control between polls for other asynchronous operations to take over. One way this could be accomplished is with the use_greenlets=True kwarg on KafkaClient, relying on gevent to handle control flow between multiple asynchronous operations.

Categories

Resources