How to implement autoretry for Celery tasks

How to implement autoretry for Celery tasks - python

In Celery, you can retry any task in case of exception. You can do it like so:
#task(max_retries=5)
def div(a, b):
try:
return a / b
except ZeroDivisionError, exc:
raise div.retry(exc=exc)
In this case, if you want to to divide by zero, task will be retied five times. But you have to check for errors in you code explicitly. Task will not be retied if you skip try-except block.
I want my functions to look like:
#celery.task(autoretry_on=ZeroDivisionError, max_retries=5)
def div(a, b):
return a / b

Celery (since version 4.0) has exactly what you were looking for:
#app.task(autoretry_for=(SomeException,))
def my_task():
...
See: http://docs.celeryproject.org/en/latest/userguide/tasks.html#automatic-retry-for-known-exceptions

I searched this issue for a while, but found only this feature request.
I decide to write my own decorator for doing auto-retries:
def task_autoretry(*args_task, **kwargs_task):
def real_decorator(func):
#task(*args_task, **kwargs_task)
#functools.wraps(func)
def wrapper(*args, **kwargs):
try:
func(*args, **kwargs)
except kwargs_task.get('autoretry_on', Exception), exc:
wrapper.retry(exc=exc)
return wrapper
return real_decorator
With this decorator I can rewriter my previous task:
#task_autoretry(autoretry_on=ZeroDivisionError, max_retries=5)
def div(a, b):
return a / b

I've modified your answer to work with the existing Celery API (currently 3.1.17)
class MyCelery(Celery):
def task(self, *args_task, **opts_task):
def real_decorator(func):
sup = super(MyCelery, self).task
#sup(*args_task, **opts_task)
#functools.wraps(func)
def wrapper(*args, **kwargs):
try:
func(*args, **kwargs)
except opts_task.get('autoretry_on', Exception) as exc:
logger.info('Yo! We did it!')
wrapper.retry(exc=exc, args=args, kwargs=kwargs)
return wrapper
return real_decorator
Then, in your tasks
app = MyCelery()
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
#app.task(autoretry_on=Exception)
def mytask():
raise Exception('Retrying!')
This allows you to add the autoretry_on functionality to your tasks without having to use a separate decorator to define tasks.

Here is an improved version of the existing answers.
This fully implements the Celery 4.2 behaviour (as documented here) but for Celery 3.1.25.
It also doesn't break the different task decorator forms (with/without parentheses) and returns/raises properly.
import functools
import random
from celery.app.base import Celery as BaseCelery
def get_exponential_backoff_interval(factor, retries, maximum, full_jitter=False):
"""
Calculate the exponential backoff wait time.
(taken from Celery 4 `celery/utils/time.py`)
"""
# Will be zero if factor equals 0
countdown = factor * (2 ** retries)
# Full jitter according to
# https://www.awsarchitectureblog.com/2015/03/backoff.html
if full_jitter:
countdown = random.randrange(countdown + 1)
# Adjust according to maximum wait time and account for negative values.
return max(0, min(maximum, countdown))
class Celery(BaseCelery):
def task(self, *args, **opts):
"""
Overridden to add a back-port of Celery 4's `autoretry_for` task args.
"""
super_method = super(Celery, self).task
def inner_create_task_cls(*args_task, **opts_task):
# http://docs.celeryproject.org/en/latest/userguide/tasks.html#Task.autoretry_for
autoretry_for = tuple(opts_task.get('autoretry_for', ())) # Tuple[Type[Exception], ...]
retry_backoff = int(opts_task.get('retry_backoff', False)) # multiplier, default if True: 1
retry_backoff_max = int(opts_task.get('retry_backoff_max', 600)) # seconds
retry_jitter = opts_task.get('retry_jitter', True) # bool
retry_kwargs = opts_task.get('retry_kwargs', {})
def real_decorator(func):
#super_method(*args_task, **opts_task)
#functools.wraps(func)
def wrapper(*func_args, **func_kwargs):
try:
return func(*func_args, **func_kwargs)
except autoretry_for as exc:
if retry_backoff:
retry_kwargs['countdown'] = get_exponential_backoff_interval(
factor=retry_backoff,
retries=wrapper.request.retries,
maximum=retry_backoff_max,
full_jitter=retry_jitter,
)
raise wrapper.retry(exc=exc, **retry_kwargs)
return wrapper
return real_decorator
# handle both `#task` and `#task(...)` decorator forms
if len(args) == 1:
if callable(args[0]):
return inner_create_task_cls(**opts)(*args)
raise TypeError('argument 1 to #task() must be a callable')
if args:
raise TypeError(
'#task() takes exactly 1 argument ({0} given)'.format(
sum([len(args), len(opts)])))
return inner_create_task_cls(**opts)
I have also written some unit tests for this as am using it in my project.
They can be found in this gist but note they are not easily runnable - treat more as documentation of how the above feature works (and validation that it works properly).

Related

Composing decorator with parameters in python

I want to use a decorator (composer) that recevices as parameter n number of decorators, this decorators will be used to decorate a function. Also I want to pass some parameters from two origins, a parameter named "SKIP" in the composer and another parameter named "parameter" sent by the parameter_sender decorator. Here's what I tried:
def compose(*decorators, SKIP=None):
def something(func):
#wraps(func)
def func_wrap(parameter = None, **kwargs):
try:
if SKIP:
print("I'm here")
return func(parameter = parameter,**kwargs)
else:
for decorator in reversed(decorators):
func = decorator(func, parameter = parameter,**kwargs) # --------- This line is providing the error ------------------
return func
raise exception
except Exception as e:
print(e)
raise exception
return func_wrap
return something
And here is an example of where do I want to use it. In this example I want to SKIP the composing of all the decorators if the variable SKIP is true.
#application.route("/function/<id_something>", methods=['GET'])
#parameter_sender
#compose(decorator_1,decorator_2, SKIP=True)
def function (id_something, **kwargs):
try:
#TODO:
return jsonify("ok")
except Exception as e:
print(e)
But i've got an error that says this:
>>I'm here
>>local variable 'func' referenced before assignment
Even when the if statement is working. PD: It works without the line indicated in the composer.

The following code should do the thing.
You were trying to set a value for a variable from outer scope. In my example I used separate temp variable composition.
def compose(*decorators, SKIP=None):
def something(func):
#wraps(func)
def func_wrap(*args, **kwargs):
try:
if SKIP:
print("I'm here")
return func(*args, **kwargs)
else:
composition = func
for decorator in reversed(decorators):
composition = decorator(composition)
return composition(*args, **kwargs)
except Exception as e:
print(e)
raise
return func_wrap
return something

how to partialy drop functools.lru_cache by one argument?

I have a get(bid, mid, pid) function. It is decorated with lru_cache. I want to drop all cache entries with bid == 105, for example.
I was thinking of some closures, that return decorated functions. Then I get some separate caches for each bid entry, and non cached function with dict of these closures that acts like a router. but maybe there is a more pythonic way for this?
upd: i came up with somthing like this, and it seems to work
getters = {}
def facade(bid, mid, pid):
global getters # not very good, better to use class
if not bid in getters:
def create_getter(bid):
#functools.lru_cache(maxsize=None)
def get(mid, pid):
print ('cache miss')
return bid + mid + pid
return get
getters[bid] = create_getter(bid)
return getters[bid](mid, pid)
val = facade(bid, mid, pid) # ability to read like before
if need_to_drop:
getters[bid].cache_clear() # ability to flush entries with specified bid

Maybe wrap functools.lru_cache and filter parameters?
from functools import lru_cache
def filtered_lru(filter_func: callable, maxsize: int):
def wrapper(f):
cached = lru_cache(maxsize=maxsize)(f)
def wrapped(*args, **kwargs):
if filter_func(*args, **kwargs):
print('Using cache')
return cached(*args, **kwargs)
else:
print('Not using cache')
return f(*args, **kwargs)
return wrapped
return wrapper
def _get_filter(*args, **kwargs):
return args[0] != 0
#filtered_lru(_get_filter, maxsize=100)
def get(num):
print('Calculating...')
return 2 * num
if __name__ == '__main__':
print(get(1))
print(get(1))
print(get(1))
print(get(0))
print(get(0))
output:
Using cache
Calculating...
2
Using cache
2
Using cache
2
Not using cache
Calculating...
0
Not using cache
Calculating...
0

Python decorator to time recursive functions

I have a simple decorator to track the runtime of a function call:
def timed(f):
def caller(*args):
start = time.time()
res = f(*args)
end = time.time()
return res, end - start
return caller
This can be used as follows, and returns a tuple of the function result and the execution time.
#timed
def test(n):
for _ in range(n):
pass
return 0
print(test(900)) # prints (0, 2.69e-05)
Simple enough. But now I want to apply this to recursive functions. Applying the above wrapper to a recursive function results in nested tuples with the times of each recursive call, as is expected.
#timed
def rec(n):
if n:
return rec(n - 1)
else:
return 0
print(rec(3)) # Prints ((((0, 1.90e-06), 8.10e-06), 1.28e-05), 1.90e-05)
What's an elegant way to write the decorator so that it handles recursion properly? Obviously, you could wrap the call if a timed function:
#timed
def wrapper():
return rec(3)
This will give a tuple of the result and the time, but I want all of it to be handled by the decorator so that the caller does not need to worry about defining a new function for every call. Ideas?

The problem here isn't really the decorator. The problem is that rec needs rec to be a function that behaves one way, but you want rec to be a function that behaves differently. There's no clean way to reconcile that with a single rec function.
The cleanest option is to stop requiring rec to be two things at once. Instead of using decorator notation, assign timed(rec) to a different name:
def rec(n):
...
timed_rec = timed(rec)
If you don't want two names, then rec needs to be written to understand the actual value that the decorated rec will return. For example,
#timed
def rec(n):
if n:
val, runtime = rec(n-1)
return val
else:
return 0

I prefer the other answers so far (particularly user2357112's answer), but you can also make a class-based decorator that detects whether the function has been activated, and if so, bypasses the timing:
import time
class fancy_timed(object):
def __init__(self, f):
self.f = f
self.active = False
def __call__(self, *args):
if self.active:
return self.f(*args)
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
self.active = False
return res, end - start
#fancy_timed
def rec(n):
if n:
time.sleep(0.01)
return rec(n - 1)
else:
return 0
print(rec(3))
(class written with (object) so that this is compatible with py2k and py3k).
Note that to really work properly, the outermost call should use try and finally. Here's the fancied up fancy version of __call__:
def __call__(self, *args):
if self.active:
return self.f(*args)
try:
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
return res, end - start
finally:
self.active = False

You could structure your timer in a different way by *ahem* abusing the contextmanager and function attribute a little...
from contextlib import contextmanager
import time
#contextmanager
def timed(func):
timed.start = time.time()
try:
yield func
finally:
timed.duration = time.time() - timed.start
def test(n):
for _ in range(n):
pass
return n
def rec(n):
if n:
time.sleep(0.05) # extra delay to notice the difference
return rec(n - 1)
else:
return n
with timed(rec) as r:
print(t(10))
print(t(20))
print(timed.duration)
with timed(test) as t:
print(t(555555))
print(t(666666))
print(timed.duration)
Results:
# recursive
0
0
1.5130000114440918
# non-recursive
555555
666666
0.053999900817871094
If this is deemed a bad hack I'll gladly accept your criticism.

Although it is not an overall solution to the problem of integrating recursion with decorators, for the problem of timing only, I have verified that the last element of the tuple of the times is the overall run time, as this is the time from the upper-most recursive call. Thus if you had
#timed
def rec():
...
to get the overall runtime given the original function definitions you could simply do
rec()[1]
Getting the result of the call, on the other hand, would then require recusing through the nested tuple:
def get(tup):
if isinstance(tup, tuple):
return get(tup[0])
else:
return tup
This might be too complicated to simply get the result of your function.

I encountered the same issue when trying to profile a simple quicksort implementation.
The main issue is that decorators are executed on each function call and we need something that can keep a state, so we can sum all calls at the end. Decorators are not the right tool the job
However, one idea is to abuse the fact that functions are objects and can have atributes. This is explored below with a simple decorator. Something that must be understood is that, by using decorator's sintax sugar (#), the function will always be accumulating its timings.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
# usage
#timeit
def rec(n):
...
if __name__ == "__main__":
result = rec(4) # rec result
print(f"Took {rec.timed:.2f} seconds")
# Out: Took 3.39 seconds
result = rec(4) # rec result
# timings between calls are accumulated
# Out: Took 6.78 seconds
Which brings us to a solution inspired by #r.ook, below is a simple context manager that stores each run timing and prints its sum at the end (__exit__). Notice that, because for each timing we require a with statement, this will not accumulate different runs.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
# TODO: report `exc_*` if an exception get raised
print(f"Took {sum(self.timed):.2f} seconds")
return
# usage
def rec(n):
...
if __name__ == "__main__":
with timeit(rec) as f:
result = f(a) # rec result
# Out: Took 3.39 seconds

python decorator TypeError missing 1 required positional argument

I'm trying to write a decorator to repeat an erroring function N times with increasingly sleeping times in between. This is my attempt so far:
def exponential_backoff(seconds=10, attempts=10):
def our_decorator(func):
def function_wrapper(*args, **kwargs):
for s in range(0, seconds*attempts, attempts):
sleep(s)
try:
return func(*args, **kwargs)
except Exception as e:
print(e)
return function_wrapper
return our_decorator
#exponential_backoff
def test():
for a in range(100):
if a - random.randint(0,1) == 0:
print('success count: {}'.format(a))
pass
else:
print('error count {}'.format(a))
'a' + 1
test()
I keep getting the error:
TypeError: our_decorator() missing 1 required positional argument: 'func'

Understand what decorator is:
#exponential_backoff
def test():
pass
equals to:
def test():
pass
test = exponential_backoff(test)
In this case, test is def our_decorator(func):. That's why you get TypeError when calling test().
So further:
#exponential_backoff()
def test():
pass
equals to:
def test():
pass
test = exponential_backoff()(test)
In this case, now test is what you need.
Further, functools.wraps helps you to copy all properties of original function to decorated function. Such as function's name or docstring:
from functools import wraps
def exponential_backoff(func):
# #wraps(func)
def function_wrapper(*args, **kwargs):
pass
return function_wrapper
#exponential_backoff
def test():
pass
print(test) # <function exponential_backoff.<locals>.function_wrapper at 0x7fcc343a4268>
# uncomment `#wraps(func)` line:
print(test) # <function test at 0x7fcc343a4400>

You should be using:
#exponential_backoff()
def test():
...
The overall decorator is not designed to have arguments be optional, so you must provide () when using it.
If want an example of how to make decorator allow argument list be optional, see:
https://wrapt.readthedocs.io/en/latest/decorators.html#decorators-with-optional-arguments
You might also consider using the wrapt package to make your decorators easier and more robust.

Either you go for the solution provided by #Graham Dumpleton or you can just modify your decorator like so:
from functools import wraps, partial
def exponential_backoff(func=None, seconds=10, attempts=10):
if func is None:
return partial(exponential_backoff, seconds=seconds, attempts=attempts)
#wraps(func)
def function_wrapper(*args, **kwargs):
for s in range(0, seconds*attempts, attempts):
sleep(s)
try:
return func(*args, **kwargs)
except Exception as e:
print(e)
return function_wrapper
#exponential_backoff
def test():
for a in range(100):
if a - random.randint(0,1) == 0:
print('success count: {}'.format(a))
pass
else:
print('error count {}'.format(a))
'a' + 1
test()
EDIT
My answer was not entirely correct, please see #GrahamDumpleton's answer which shows how to make my attempt of a solution viable (i.e. this link). Fixed it now, thank you #GrahamDumpleton !

Using decorators with MapReduce mapper/reducer functions?

I'm trying to wrap my mapper/reducer functions with something like:
def log_exceptions_to_sentry(sentry_id, raise_exception):
def decorator(fn):
def wrapper(*args, **kwargs):
try:
return fn(*args, **kwargs)
except Exception, e:
client = Client(sentry_id)
client.captureException(
exc_info=sys.exc_info())
if raise_exception:
raise e
return wrapper
return decorator
and so my mapper/reducer functions look like:
#log_exceptions_to_sentry(SENTRY_ID, False)
def my_mapper_fn(item):
logging.info(item)
But it doesn't seem to work. Without the decorator, I'd find INFO logs of item. But if I put the decorator, it seems the mapper/reducer functions don't get called at all.
I was hoping to make it easy to log any errors my functions might have so I can fix them, as trying to track down MapReduce via AppEngine's logs is almost impossible.
I could wrap the entire function body with try ... except block, but a decorator would be cleaner.

I believe you have an issue with the decorator structure. In partuclar, I think you want to replace
try:
return fn(*args, **kwargs)
with
try:
fn(*args, **kwargs)
I'm missing some of the functions to test this, but you can see simplified decorator examples here if you want to run one: http://simeonfranklin.com/blog/2012/jul/1/python-decorators-in-12-steps/
Try something like this to make sure your code works, then try the more complicated parameterized version after:
sentry_id = id
raise_exception = 1
def basic_decorator(function):
global sentry_id, raise_exception
def wrapper(*args,**kwargs):
try:
function(*args,**kwargs)
except Exception, e:
client = Client(sentry_id)
client.captureException(exc_info=sys.exc_info())
if raise_exception:
raise
return wrapper
#basic_decorator
def my_mapper_fn(item):
logging.info(item)
To parameterize sentry_id and raise_exception, wrap the decorator inside another decorator. The idea is that when the basic decorator is defined, sentry_id, raise_exception, and function will be defined ahead of time and enclosed within its scope. This should look something like
def log_exceptions_to_sentry(sentry_id,raise_exception=1):
def basic_decorator(function):
def wrapper(*args, **kwargs):
try:
function(*args,**kwargs)
except Exception, e:
client = Client(sentry_id)
client.captureException(exc_info=sys.exc_info())
if raise_exception:
raise
return wrapper
return basic_decorator
#log_exceptions_to_sentry(SENTRY_ID,RAISE_EXCEPTION)
def my_mapper_fn(item):
logging.info(item)

I don't know what SENTRY_ID or Client is, since you didn't post it. So I made up my own. Using your code exactly, everything appears to work as expected. I'm not sure what you're seeing that isn't working right.
SENTRY_ID = 1
class Client(object):
def __init__(self, sentry_id): pass
def captureException(self, **kwargs):
print('captureException, ', kwargs['exc_info'])
def log_exceptions_to_sentry(sentry_id, raise_exception):
def decorator(fn):
def wrapper(*args, **kwargs):
try:
return fn(*args, **kwargs)
except Exception as e:
client = Client(sentry_id)
client.captureException(
exc_info=sys.exc_info())
if raise_exception:
raise e
return wrapper
return decorator
def fn(item):
logging.debug(item)
logging.info(item)
logging.error(item)
#log_exceptions_to_sentry(SENTRY_ID, False)
def my_mapper_fn(item):
logging.debug(item)
logging.info(item)
logging.error(item)
return 1
#log_exceptions_to_sentry(SENTRY_ID, False)
def my_mapper_fn2(item):
raise Exception()
logging.basicConfig(
level = logging.INFO,
format = '%(levelname)s:%(name)s:%(message)s',
#format = '%(message)s',
)
x = fn({'a':1})
print(x)
x = my_mapper_fn({'b':2})
print(x)
x = my_mapper_fn2({'c':3})
print(x)
Output:
INFO:root:{'a': 1}
ERROR:root:{'a': 1}
None
INFO:root:{'b': 2}
ERROR:root:{'b': 2}
1
captureException, (<type 'exceptions.Exception'>, Exception(), <traceback object at 0x1813cf8>)
None

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to implement autoretry for Celery tasks - python

Celery (since version 4.0) has exactly what you were looking for: #app.task(autoretry_for=(SomeException,)) def my_task(): ... See: http://docs.celeryproject.org/en/latest/userguide/tasks.html#automatic-retry-for-known-exceptions

Related

Composing decorator with parameters in python

how to partialy drop functools.lru_cache by one argument?

Python decorator to time recursive functions

python decorator TypeError missing 1 required positional argument

Using decorators with MapReduce mapper/reducer functions?

Categories

Resources