Python decorator to time recursive functions - python

I have a simple decorator to track the runtime of a function call:
def timed(f):
def caller(*args):
start = time.time()
res = f(*args)
end = time.time()
return res, end - start
return caller
This can be used as follows, and returns a tuple of the function result and the execution time.
#timed
def test(n):
for _ in range(n):
pass
return 0
print(test(900)) # prints (0, 2.69e-05)
Simple enough. But now I want to apply this to recursive functions. Applying the above wrapper to a recursive function results in nested tuples with the times of each recursive call, as is expected.
#timed
def rec(n):
if n:
return rec(n - 1)
else:
return 0
print(rec(3)) # Prints ((((0, 1.90e-06), 8.10e-06), 1.28e-05), 1.90e-05)
What's an elegant way to write the decorator so that it handles recursion properly? Obviously, you could wrap the call if a timed function:
#timed
def wrapper():
return rec(3)
This will give a tuple of the result and the time, but I want all of it to be handled by the decorator so that the caller does not need to worry about defining a new function for every call. Ideas?

The problem here isn't really the decorator. The problem is that rec needs rec to be a function that behaves one way, but you want rec to be a function that behaves differently. There's no clean way to reconcile that with a single rec function.
The cleanest option is to stop requiring rec to be two things at once. Instead of using decorator notation, assign timed(rec) to a different name:
def rec(n):
...
timed_rec = timed(rec)
If you don't want two names, then rec needs to be written to understand the actual value that the decorated rec will return. For example,
#timed
def rec(n):
if n:
val, runtime = rec(n-1)
return val
else:
return 0

I prefer the other answers so far (particularly user2357112's answer), but you can also make a class-based decorator that detects whether the function has been activated, and if so, bypasses the timing:
import time
class fancy_timed(object):
def __init__(self, f):
self.f = f
self.active = False
def __call__(self, *args):
if self.active:
return self.f(*args)
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
self.active = False
return res, end - start
#fancy_timed
def rec(n):
if n:
time.sleep(0.01)
return rec(n - 1)
else:
return 0
print(rec(3))
(class written with (object) so that this is compatible with py2k and py3k).
Note that to really work properly, the outermost call should use try and finally. Here's the fancied up fancy version of __call__:
def __call__(self, *args):
if self.active:
return self.f(*args)
try:
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
return res, end - start
finally:
self.active = False

You could structure your timer in a different way by *ahem* abusing the contextmanager and function attribute a little...
from contextlib import contextmanager
import time
#contextmanager
def timed(func):
timed.start = time.time()
try:
yield func
finally:
timed.duration = time.time() - timed.start
def test(n):
for _ in range(n):
pass
return n
def rec(n):
if n:
time.sleep(0.05) # extra delay to notice the difference
return rec(n - 1)
else:
return n
with timed(rec) as r:
print(t(10))
print(t(20))
print(timed.duration)
with timed(test) as t:
print(t(555555))
print(t(666666))
print(timed.duration)
Results:
# recursive
0
0
1.5130000114440918
# non-recursive
555555
666666
0.053999900817871094
If this is deemed a bad hack I'll gladly accept your criticism.

Although it is not an overall solution to the problem of integrating recursion with decorators, for the problem of timing only, I have verified that the last element of the tuple of the times is the overall run time, as this is the time from the upper-most recursive call. Thus if you had
#timed
def rec():
...
to get the overall runtime given the original function definitions you could simply do
rec()[1]
Getting the result of the call, on the other hand, would then require recusing through the nested tuple:
def get(tup):
if isinstance(tup, tuple):
return get(tup[0])
else:
return tup
This might be too complicated to simply get the result of your function.

I encountered the same issue when trying to profile a simple quicksort implementation.
The main issue is that decorators are executed on each function call and we need something that can keep a state, so we can sum all calls at the end. Decorators are not the right tool the job
However, one idea is to abuse the fact that functions are objects and can have atributes. This is explored below with a simple decorator. Something that must be understood is that, by using decorator's sintax sugar (#), the function will always be accumulating its timings.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
# usage
#timeit
def rec(n):
...
if __name__ == "__main__":
result = rec(4) # rec result
print(f"Took {rec.timed:.2f} seconds")
# Out: Took 3.39 seconds
result = rec(4) # rec result
# timings between calls are accumulated
# Out: Took 6.78 seconds
Which brings us to a solution inspired by #r.ook, below is a simple context manager that stores each run timing and prints its sum at the end (__exit__). Notice that, because for each timing we require a with statement, this will not accumulate different runs.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
# TODO: report `exc_*` if an exception get raised
print(f"Took {sum(self.timed):.2f} seconds")
return
# usage
def rec(n):
...
if __name__ == "__main__":
with timeit(rec) as f:
result = f(a) # rec result
# Out: Took 3.39 seconds

Related

get the last function call in the continuous calling (chain of function calls) - Python [duplicate]

How could one write a debounce decorator in python which debounces not only on function called but also on the function arguments/combination of function arguments used?
Debouncing means to supress the call to a function within a given timeframe, say you call a function 100 times within 1 second but you only want to allow the function to run once every 10 seconds a debounce decorated function would run the function once 10 seconds after the last function call if no new function calls were made. Here I'm asking how one could debounce a function call with specific function arguments.
An example could be to debounce an expensive update of a person object like:
#debounce(seconds=10)
def update_person(person_id):
# time consuming, expensive op
print('>>Updated person {}'.format(person_id))
Then debouncing on the function - including function arguments:
update_person(person_id=144)
update_person(person_id=144)
update_person(person_id=144)
>>Updated person 144
update_person(person_id=144)
update_person(person_id=355)
>>Updated person 144
>>Updated person 355
So calling the function update_person with the same person_id would be supressed (debounced) until the 10 seconds debounce interval has passed without a new call to the function with that same person_id.
There's a few debounce decorators but none includes the function arguments, example: https://gist.github.com/walkermatt/2871026
I've done a similar throttle decorator by function and arguments:
def throttle(s, keep=60):
def decorate(f):
caller = {}
def wrapped(*args, **kwargs):
nonlocal caller
called_args = '{}'.format(*args)
t_ = time.time()
if caller.get(called_args, None) is None or t_ - caller.get(called_args, 0) >= s:
result = f(*args, **kwargs)
caller = {key: val for key, val in caller.items() if t_ - val > keep}
caller[called_args] = t_
return result
# Keep only calls > keep
caller = {key: val for key, val in caller.items() if t_ - val > keep}
caller[called_args] = t_
return wrapped
return decorate
The main takaway is that it keeps the function arguments in caller[called_args]
See also the difference between throttle and debounce: http://demo.nimius.net/debounce_throttle/
Update:
After some tinkering with the above throttle decorator and the threading.Timer example in the gist, I actually think this should work:
from threading import Timer
from inspect import signature
import time
def debounce(wait):
def decorator(fn):
sig = signature(fn)
caller = {}
def debounced(*args, **kwargs):
nonlocal caller
try:
bound_args = sig.bind(*args, **kwargs)
bound_args.apply_defaults()
called_args = fn.__name__ + str(dict(bound_args.arguments))
except:
called_args = ''
t_ = time.time()
def call_it(key):
try:
# always remove on call
caller.pop(key)
except:
pass
fn(*args, **kwargs)
try:
# Always try to cancel timer
caller[called_args].cancel()
except:
pass
caller[called_args] = Timer(wait, call_it, [called_args])
caller[called_args].start()
return debounced
return decorator
I've had the same need to build a debounce annotation for a personal project, after stumbling upon the same gist / discussion you have, I ended up with the following solution:
import threading
def debounce(wait_time):
"""
Decorator that will debounce a function so that it is called after wait_time seconds
If it is called multiple times, will wait for the last call to be debounced and run only this one.
"""
def decorator(function):
def debounced(*args, **kwargs):
def call_function():
debounced._timer = None
return function(*args, **kwargs)
# if we already have a call to the function currently waiting to be executed, reset the timer
if debounced._timer is not None:
debounced._timer.cancel()
# after wait_time, call the function provided to the decorator with its arguments
debounced._timer = threading.Timer(wait_time, call_function)
debounced._timer.start()
debounced._timer = None
return debounced
return decorator
I've created an open-source project to provide functions such as debounce, throttle, filter ... as decorators, contributions are more than welcome to improve on the solution I have for these decorators / add other useful decorators: decorator-operations repository

How to eliminate recursion in Python function containing control flow

I have a function of the form:
def my_func(my_list):
for i, thing in enumerate(my_list):
my_val = another_func(thing)
if i == 0:
# do some stuff
else:
if my_val == something:
return my_func(my_list[:-1])
# do some other stuff
The recursive part is getting called enough that I am getting a RecursionError, so I am trying to replace it with a while loop as explained here, but I can't work out how to reconcile this with the control flow statements in the function. Any help would be gratefully received!
There may be a good exact answer, but the most general (or maybe quick-and-dirty) way to switch from recursion to iteration is to manage the stack yourself. Just do manually what programming language does implicitly and have your own unlimited stack.
In this particular case there is tail recursion. You see, my_func recursive call result is not used by the caller in any way, it is immediately returned. What happens in the end is that the deepest recursive call's result bubbles up and is being returned as it is. This is what makes #outoftime's solution possible. We are only interested in into-recursion pass, as the return-from-recursion pass is trivial. So the into-recursion pass is replaced with iterations.
def my_func(my_list):
run = True
while run:
for i, thing in enumerate(my_list):
my_val = another_func(thing)
if i == 0:
# do some stuff
else:
if my_val == something:
my_list = my_list[:-1]
break
# do some other stuff
This is an iterative method.
Decorator
class TailCall(object):
def __init__(self, __function__):
self.__function__ = __function__
self.args = None
self.kwargs = None
self.has_params = False
def __call__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.has_params = True
return self
def __handle__(self):
if not self.has_params:
raise TypeError
if type(self.__function__) is TailCaller:
return self.__function__.call(*self.args, **self.kwargs)
return self.__function__(*self.args, **self.kwargs)
class TailCaller(object):
def __init__(self, call):
self.call = call
def __call__(self, *args, **kwargs):
ret = self.call(*args, **kwargs)
while type(ret) is TailCall:
ret = ret.__handle__()
return ret
#TailCaller
def factorial(n, prev=1):
if n < 2:
return prev
return TailCall(factorial)(n-1, n * prev)
To use this decorator simply wrap your function with #TailCaller decorator and return TailCall instance initialized with required params.
I'd like to say thank you for inspiration to #o2genum and to Kyle Miller who wrote an excellent article about this problem.
Despite how good is to remove this limitation, probably, you have to be
aware of why this feature is not officially supported.

How can I capture return value with Python timeit module?

Im running several machine learning algorithms with sklearn in a for loop and want to see how long each of them takes. The problem is I also need to return a value and DONT want to have to run it more than once because each algorithm takes so long. Is there a way to capture the return value 'clf' using python's timeit module or a similar one with a function like this...
def RandomForest(train_input, train_output):
clf = ensemble.RandomForestClassifier(n_estimators=10)
clf.fit(train_input, train_output)
return clf
when I call the function like this
t = Timer(lambda : RandomForest(trainX,trainy))
print t.timeit(number=1)
P.S. I also dont want to set a global 'clf' because I might want to do multithreading or multiprocessing later.
For Python 3.5 you can override the value of timeit.template
timeit.template = """
def inner(_it, _timer{init}):
{setup}
_t0 = _timer()
for _i in _it:
retval = {stmt}
_t1 = _timer()
return _t1 - _t0, retval
"""
unutbu's answer works for python 3.4 but not 3.5 as the _template_func function appears to have been removed in 3.5
The problem boils down to timeit._template_func not returning the function's return value:
def _template_func(setup, func):
"""Create a timer function. Used if the "statement" is a callable."""
def inner(_it, _timer, _func=func):
setup()
_t0 = _timer()
for _i in _it:
_func()
_t1 = _timer()
return _t1 - _t0
return inner
We can bend timeit to our will with a bit of monkey-patching:
import timeit
import time
def _template_func(setup, func):
"""Create a timer function. Used if the "statement" is a callable."""
def inner(_it, _timer, _func=func):
setup()
_t0 = _timer()
for _i in _it:
retval = _func()
_t1 = _timer()
return _t1 - _t0, retval
return inner
timeit._template_func = _template_func
def foo():
time.sleep(1)
return 42
t = timeit.Timer(foo)
print(t.timeit(number=1))
returns
(1.0010340213775635, 42)
The first value is the timeit result (in seconds), the second value is the function's return value.
Note that the monkey-patch above only affects the behavior of timeit when a callable is passed timeit.Timer. If you pass a string statement, then you'd have to (similarly) monkey-patch the timeit.template string.
Funnily enough, I'm also doing machine-learning, and have a similar requirement ;-)
I solved it as follows, by writing a function, that:
runs your function
prints the running time, along with the name of your function
returns the results
Let's say you want to time:
clf = RandomForest(train_input, train_output)
Then do:
clf = time_fn( RandomForest, train_input, train_output )
Stdout will show something like:
mymodule.RandomForest: 0.421609s
Code for time_fn:
import time
def time_fn( fn, *args, **kwargs ):
start = time.clock()
results = fn( *args, **kwargs )
end = time.clock()
fn_name = fn.__module__ + "." + fn.__name__
print fn_name + ": " + str(end-start) + "s"
return results
If I understand it well, after python 3.5 you can define globals at each Timer instance without having to define them in your block of code. I am not sure if it would have the same issues with parallelization.
My approach would be something like:
clf = ensemble.RandomForestClassifier(n_estimators=10)
myGlobals = globals()
myGlobals.update({'clf'=clf})
t = Timer(stmt='clf.fit(trainX,trainy)', globals=myGlobals)
print(t.timeit(number=1))
print(clf)
As of 2020, in ipython or jupyter notebook it is
t = %timeit -n1 -r1 -o RandomForest(trainX, trainy)
t.best
If you don't want to monkey-patch timeit, you could try using a global list, as below. This will also work in python 2.7, which doesn't have globals argument in timeit():
from timeit import timeit
import time
# Function to time - plaigiarised from answer above :-)
def foo():
time.sleep(1)
return 42
result = []
print timeit('result.append(foo())', setup='from __main__ import result, foo', number=1)
print result[0]
will print the time and then the result.
An approach I'm using it is to "append" the running time to the results of the timed function. So, I write a very simple decorator using the "time" module:
def timed(func):
def func_wrapper(*args, **kwargs):
import time
s = time.clock()
result = func(*args, **kwargs)
e = time.clock()
return result + (e-s,)
return func_wrapper
And then I use the decorator for the function I want to time.
For Python 3.X I use this approach:
# Redefining default Timer template to make 'timeit' return
# test's execution timing and the function return value
new_template = """
def inner(_it, _timer{init}):
{setup}
_t0 = _timer()
for _i in _it:
ret_val = {stmt}
_t1 = _timer()
return _t1 - _t0, ret_val
"""
timeit.template = new_template

timeit eats return value

I want to measure execution time of a function on the cheap, something like this:
def my_timeit(func, *args, **kwargs):
t0 = time.time()
result = func(*args, **kwargs)
delta = time.time() - t0
return delta, result
def foo():
time.sleep(1.23)
return 'potato'
delta, result = my_timeit(foo)
But I want to use timeit, profile or other built-in to handle whatever are the common pitfalls due to platform differences, and it would probably be also better to get the actual execution time not the wall time.
I tried using timeit.Timer(foo).timeit(number=1) but the interface seems to obscure the return value.
This is my current attempt. But I would welcome any suggestions, because this feels too hacky and could probably do with improvement.
import time
from timeit import Timer
def my_timeit(func, *args, **kwargs):
output_container = []
def wrapper():
output_container.append(func(*args, **kwargs))
timer = Timer(wrapper)
delta = timer.timeit(1)
return delta, output_container.pop()
def foo():
time.sleep(1.111)
return 'potato'
delta, result = my_timeit(foo)
edit: adapted to work as a decorator below:
def timeit_decorator(the_func):
#functools.wraps(the_func)
def my_timeit(*args, **kwargs):
output_container = []
def wrapper():
output_container.append(the_func(*args, **kwargs))
timer = Timer(wrapper)
delta = timer.timeit(1)
my_timeit.last_execution_time = delta
return output_container.pop()
return my_timeit
How about
>>time python yourprogram.py < input.txt
This is the output for a python script I ran
[20:13:29] praveen:jan$ time python mtrick.py < input_mtrick.txt
3 3 9
1 2 3 4
real 0m0.067s
user 0m0.016s
sys 0m0.012s

Can I memoize a Python generator?

I have a function called runquery that makes calls to a database and then yields the rows, one by one. I wrote a memoize decorator (or more accurately, I just stole one from this stackoverflow question) but on subsequent calls it just yields an empty sequence, presumably because a generator's values can only be yielded once.
How could I modify the memoization decorator that works for Python generators? I realise I will need to store it in memory at some point but I'd like to handle this within the decorator and not modify the original function.
The current code of the memoization function is:
def memoized(f):
# Warning: Doesn't work if f yields values
cache={}
def ret(*args):
if args in cache:
return cache[args]
else:
answer=f(*args)
cache[args]=answer
return answer
return ret
I realise this is somewhat of an old question, but for those who want a full solution: here's one, based on jsbueno's suggestion:
from itertools import tee
from types import GeneratorType
Tee = tee([], 1)[0].__class__
def memoized(f):
cache={}
def ret(*args):
if args not in cache:
cache[args]=f(*args)
if isinstance(cache[args], (GeneratorType, Tee)):
# the original can't be used any more,
# so we need to change the cache as well
cache[args], r = tee(cache[args])
return r
return cache[args]
return ret
from itertools import tee
sequence, memoized_sequence = tee (sequence, 2)
Done.
It is easier for generators because the standard lib has this "tee" method!
Yes. There's a decorator posted here. Take note that as the poster says, you lose some of the benefit of lazy evaluation.
def memoize(func):
def inner(arg):
if isinstance(arg, list):
# Make arg immutable
arg = tuple(arg)
if arg in inner.cache:
print "Using cache for %s" % repr(arg)
for i in inner.cache[arg]:
yield i
else:
print "Building new for %s" % repr(arg)
temp = []
for i in func(arg):
temp.append(i)
yield i
inner.cache[arg] = temp
inner.cache = {}
return inner
#memoize
def gen(x):
if not x:
yield 0
return
for i in xrange(len(x)):
for a in gen(x[i + 1:]):
yield a + x[0]
print "Round 1"
for a in gen([2, 3, 4, 5]):
print a
print
print "Round 2"
for a in gen([2, 3, 4, 5]):
print a
Similar to the other answers, but simpler if you know that f is a generator:
def memoized_generator(f):
cache = {}
#functools.wraps(f)
def wrapper(*args, **kwargs):
k = args, frozenset(kwargs.items())
it = cache[k] if k in cache else f(*args, **kwargs)
cache[k], result = itertools.tee(it)
return result
return wrapper

Categories

Resources