I am building a webapp using flask and using celery to send mails periodically.
The problem is, whenever the there is a new entry in the database, celery doesn't sees it and continues to use the old entries. I have to restart the celery worker each time to make it work properly. Celery beat is running and I am using redis as a broker.
Celery related functions:
#celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(30.0, appointment_checkout, name='appointment_checkout')
#celery.task(name='app.Blueprints.periods.appointment_checkout')
def appointment_checkout():
from app.Blueprints.api.routes import fetchAllAppointments, fetch_user_email, fetch_user_phone
from app.Blueprints import db
dt = datetime.now() + timedelta(minutes=10)
#fa = Appointment.query.filter_by(date = dt.strftime("%Y-%m-%d"))
fa = fetchAllAppointments()
for i in fa:
# send emails to clients and counsellors
try:
if(str(i.date.year) != dt.strftime("%Y") or str(i.date.month) != dt.strftime("%m") or str(i.date.day) != dt.strftime("%d")):
continue
except:
continue
if(i.reminderFlag == 1):
continue
if(int(dt.strftime("%H")) == int(i.time.hour) and int(dt.strftime("%M")) == int(i.time.minute)):
client = fetch_user_email(i.user)
counsellor = fetch_user_email(i.counsellor)
client_phone = fetch_user_phone(i.user)
counsellor_phone = fetch_user_phone(i.counsellor)
i.reminderFlag = 1
db.session.add(i)
db.session.commit()
# client email
subject = "appointment notification"
msg = "<h1>Greetings</h1></p>This is to notify you that your appointment is about to begin soon.</p>"
sendmail.delay(subject, msg, client)
sendmail.delay(subject, msg, counsellor)
sendmsg.delay(client_phone, msg)
sendmsg.delay(counsellor_phone, msg)
When I add something in the appointment table, celery doesn't sees the new entry. After restarting the celery worker it sees it.
I am running beat and worker using the following commands:
celery -A periods beat --loglevel=INFO
celery -A periods worker --loglevel=INFO --concurrency=2
Related
I'm trying to create background task for my django app to translate some text.
I've managed to write code that works when I execute it directly using .py file:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r"xxxxx\GoogleCloudKey.json"
client = translate.TranslationServiceClient()
def translate_text(source_lang, target_lang, text):
output = client.translate_text(
contents=[text],
target_language_code=target_lang,
source_language_code=source_lang,
parent='projects/xxxx',
)
translated_text = ''
for translation in output.translations:
translated_text += translation.translated_text
print(translated_text)
return translated_text
def translate_table(target_lang):
print('Recieved task!')
print('Target lang :', target_lang)
data = ['Wood and Wood Residuals', 'test application]
#data = ModelXXX.objects.all()
for row in data:
if row:
row = translate_text('en-US', target_lang, row)
but when I add #shared_task decorator to translate_table function and execute it from my django app (using translate_table.delay(target_lang)) it throws google.api_core.exceptions.ServiceUnavailable: 503 Deadline Exceeded after Making request: POST https://oauth2.googleapis.com/token. Do you have any idea what can cause this error?
I'm starting celery background worker using celery -A responsiblee worker -l info -P eventlet --loglevel=DEBUG. Also I have tried using another worker: celery -A responsiblee worker -l info -P gevent --loglevel=DEBUG
I have a flask app which sends emails/SMSs to users at a specific time using the ETA/Countdown celery functions with Redis as a broker, The issue is the emails & SMS tasks duplicate randomly - sometimes users get 10 emails/SMSs sometimes users get 20+ for these tasks and the task is only supposed to run once off. The data flow:
Initial function schedule_event_main calls the ETA tasks with the notifications
date_event = datetime.combine(day, time.max)
schedule_ratings_email.apply_async([str(event[0])], eta=date_event)
schedule_ratings_sms.apply_async([str(event[0])], eta=date_event)
Inside function schedule_ratings_email & schedule_ratings_sms task is the .delay task function which creates the individual celery tasks to send out the emails + SMSs to the various guests for an event.
#app.task(bind=True)
def schedule_ratings_email(self,event_id):
""" Fetch feed of URLs to crawl and queue up a task to grab and process
each url. """
try:
url = SITE_URL + 'user/dashboard'
guests = db.session.query(EventGuest).filter(EventGuest.event_id == int(event_id)).all()
event_details = db.session.query(Event).filter(Event.id == event_id).first()
if guests:
if event_details.status == "archived":
for guest in guests:
schedule_individual_ratings_emails.delay(guest.user.first_name, guest.event.host.first_name, guest.user.email,url)
except Exception as e:
log.error("Error processing ratings email for %s" % event_id, exc_info=e)
# self.retry()
This is the final .delay individual task for sending the notifications:
#app.task()
def schedule_individual_ratings_emails(guest_name, host, guest, url):
try:
email_rating(guest_name, host, guest, url)
except Exception as e:
log.error("Error processing ratings email for %s", exc_info=e)
I've tried multiple SO answers and tweaked a lot of variables including celery settings however the notifications are still duplicating. It's only the ETA/Countdown tasks and ONLY with 3rd party servers as I have certain ETA tasks which have DB data writing and those tasks don't have any issues.
This is both an issue on local and heroku (production). Current tech stack:
Flask==1.0.2
celery==4.1.0
Redis 4.0.9
Celery startup: worker: celery worker --app openseat.tasks --beat --concurrency 1 --loglevel info
Celery config details:
CELERY_ACKS_LATE = True
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TIMEZONE = 'Africa/Johannesburg'
CELERY_ENABLE_UTC = True
Here is the setup - django project with celery and a CloudAMQP rabbitMQ worker doing the message brokering.
My Celery/RabbitMQ settings:
# RabbitMQ & Celery settings
BROKER_URL = 'ampq://guest:guest#localhost:5672/' # Understandably fake
BROKER_POOL_LIMIT = 1
BROKER_CONNECTION_TIMEOUT = 30
BROKER_HEARTBEAT = 30
CELERY_SEND_EVENTS = False
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
A docker container running celery with the following command:
bash -c 'cd django && celery -A pkm_main worker -E -l info --concurrency=3'
The shared_task definition:
from __future__ import absolute_import
from celery import shared_task
#shared_task
def push_notification(user_id, message):
logging.critical('Push notifications sent')
return {'status': 'success'}
And me actually calling it when something happens (I have omitted some of the code because it does not seem to be relevant):
from notificatons.tasks import push_notification
def like_this(self, **args):
# Do like stuff and then do .delay()
push_notification.delay(media.user.id, request.user.username + ' has liked your item')
So when this is ran - everything seems fine and dandy - the output looks like so:
worker_1 | [2016-03-25 09:03:34,888: INFO/MainProcess] Received task: notifications.tasks.push_notification[8443bd88-fa02-4ea4-9bff-8fbec8c91516]
worker_1 | [2016-03-25 09:03:35,333: CRITICAL/Worker-1] Push notifications sent
worker_1 | [2016-03-25 09:03:35,336: INFO/MainProcess] Task notifications.tasks.push_notification[8443bd88-fa02-4ea4-9bff-8fbec8c91516] succeeded in 0.444933412999s: {'status': 'success'}
So from what I gather the task has been ran and executed properly, the messages should be stopped and RabbitMQ should stop.
But in my RabbitMQ Management I see messages getting published and delivered non-stop:
So what I'm gathering from this is that RabbitMQ is trying to send some sort of confirmation and failing and retrying? Is there a way to actually turn this behavior off?
All help and advice is warmly welcomed.
EDIT: Forgot to mentions something important - until I call on push_notification.delay() the message tab is empty save for the heartbeat that comes and goes every 30 seconds. Only after I have called .delay() does this happen.
EDIT 2: CELERYBEAT_SCHEDULE settings (I've tried running with and without them - there was no difference but adding them just in case)
CELERYBEAT_SCHEDULE = {
"minutely_process_all_notifications": {
'task': 'transmissions.tasks.process_all_notifications',
'schedule': crontab(minute='*')
}
}
EDIT 3: Added View code. Also I'm not using the CELERYBEAT_SCHEDULE. I'm just keeping the config in the code for future scheduled tasks
from notifications.tasks import push_notification
class MediaLikesView(BaseView):
def post(self, request, media_id):
media = self.get_object(media_id)
data = {}
data['media'] = media.id
data['user'] = request.user.id
serializer = MediaLikeSerializer(data=data)
if serializer.is_valid():
like = serializer.save()
push_notification.delay(media.user.id, request.user.username + ' has liked your item')
serializer = MediaGetLikeSerializer(like)
return self.get_mocked_pagination_response(status=status.HTTP_204_NO_CONTENT)
return self.get_mocked_pagination_response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
It's Celery's mingle and gossiping. Disable by adding --without-gossip --without-mingle --without-heartbeat to the command line arguments.
Also don't forget to set BROKER_HEARTBEAT = None when you've disabled heartbeats on the commandline, otherwise you'll disconnected after 30s. It's most often better to rely on TCP keepalive then AMQP heartbeats, or even worse, Celery's own heartbeats.
So our use case might be out of the remit of what Celery can do, but I thought I'd ask...
Use Case
We are planning on using a hosted/managed RabbitMQ cluster backing which Celery will be using for it's broker.
We want to ensure that our app has 0 downtime (obviously) so we're trying to figure out how we can handle the event when our upstream cluster has a catastrophic failure whereby the entire cluster is unavailable.
Our thought is that we have a standby Rabbit cluster that when the connection drops, we can automatically switch Celery to use that connection instead.
In the meantime, Celery is determining whether the master cluster is up and running and when it is, all of the publishers reconnect to the master, the workers drain the backup cluster and when empty, switch back onto the master.
The issue
What I'm having difficulty with is capturing the connection failure as it seems to happen deep within celery as the Exception doesn't bubble up to the app.
I can see that Celery has a BROKER_FAILOVER_STRATEGY configuration property, which would handle the initial swap, but it (seemingly) is only utilised when failover occurs, which doesn't fit our use case of swapping back to the master when it is back up.
I've also come across Celery's "bootsteps", but these are applied after Celery's own "Connection" bootstep which is where the exception is being thrown.
I have a feeling this approach is probably not the best one given the limitations I've been finding, but has anyone got any ideas on how I'd go about overriding the default Connection bootstep or achieving this via a different means?
It's quite old, but maybe useful to someone. I'm usin FastApi with Celery 5.2.
run_api.py file:
import uvicorn
if __name__ == "__main__":
port=8893
print("Starting API server on port {}".format(port))
uvicorn.run("endpoints:app", host="localhost", port=port, access_log=False)
endpoints.py file:
import threading
import time
import os
from celery import Celery
from fastapi import FastAPI
import itertools
import random
# Create object for fastAPI
app = FastAPI()
# Create and onfigure Celery to manage queus
# ----
celery = Celery(__name__)
celery.conf.broker_url = ["redis://localhost:6379"]
celery.conf.result_backend = "redis://localhost:6379"
celery.conf.task_track_started = True
celery.conf.task_serializer = "pickle"
celery.conf.result_serializer = "pickle"
celery.conf.accept_content = ["pickle"]
def random_failover_strategy(servers):
# The next line is necessary to work, even you don't use them:
it = list(servers) # don't modify callers list
shuffle = random.shuffle
for _ in itertools.repeat(None):
# Do whatever action required here to obtain the new url
# As an example, ra.
ra = random.randint(0, 100)
it = [f"redis://localhost:{str(ra)}"]
celery.conf.result_backend = it[0]
shuffle(it)
yield it[0]
celery.conf.broker_failover_strategy = random_failover_strategy
# Start the celery worker. I start it in a separate thread, so fastapi can run in parallel
worker = celery.Worker()
def start_worker():
worker.start()
ce = threading.Thread(target=start_worker)
ce.start()
# ----
#app.get("/", tags=["root"])
def root():
return {"message": ""}
#app.post("/test")
def test(num: int):
task = test_celery.delay(num)
print(f'task id: {task.id}')
return {
"task_id": task.id,
"task_status": "PENDING"}
#celery.task(name="test_celery", bind=True)
def test_celery(self, num):
self.update_state(state='PROGRESS')
print("ENTERED PROCESS", num)
time.sleep(100)
print("EXITING PROCESS", num)
return {'number': num}
#app.get("/result")
def result(id: str):
task_result = celery.AsyncResult(id)
if task_result.status == "SUCCESS":
return {
"task_status": task_result.status,
"task_num": task_result.result['number']
}
else:
return {
"task_status": task_result.status,
"task_num": None
}
Place both files in the same folder. Run python3 run_api.py.
Enjoy!
I want to implement task cancellation with cleanup process on Celery + RabbitMQ broker.
How can I get "REVOKED" status of current task in Celery worker?
# tasks.py -- celery worker
from celery import Celery
app = Celery('tasks', broker='amqp://guest#localhost//')
#app.task
def add(x, y):
for i in range(0, 10):
time.sleep(1)
# I want check here for cleanup.
return x + y
# caller.py
from tasks import add
result = add.delay(4, 4)
result.revoke()
Celery supports Abortable tasks, but it only works the database backend.
Python 3.4.1 / Celery 3.1.17 / RabbitMQ 3.4.4
Felippe Da Motta Raposo's suggestion works inside my custom task:
from celery import Task
from celery.task.control import inspect
WORKER_NAME = "celery#server"
inspector = inspect([WORKER_NAME])
class CustomTask(Task):
def _is_revoked(self):
revoked_list = inspector.revoked()
return (revoked_list and self.task_id in revoked_list[WORKER_ADDRESS]
def run(self, *args, **kwargs):
self.task_id = self.request.id
Take a look at the scheduled_tasks, you can ask celery if your task is scheduled to run.
EX.:
import celery
celery_inspect = celery.current_app.control.inspect()
celery_inspect.registered_tasks()
This method returns a dict with all celery scheduled tasks by worker.