I have a function called runquery that makes calls to a database and then yields the rows, one by one. I wrote a memoize decorator (or more accurately, I just stole one from this stackoverflow question) but on subsequent calls it just yields an empty sequence, presumably because a generator's values can only be yielded once.
How could I modify the memoization decorator that works for Python generators? I realise I will need to store it in memory at some point but I'd like to handle this within the decorator and not modify the original function.
The current code of the memoization function is:
def memoized(f):
# Warning: Doesn't work if f yields values
cache={}
def ret(*args):
if args in cache:
return cache[args]
else:
answer=f(*args)
cache[args]=answer
return answer
return ret
I realise this is somewhat of an old question, but for those who want a full solution: here's one, based on jsbueno's suggestion:
from itertools import tee
from types import GeneratorType
Tee = tee([], 1)[0].__class__
def memoized(f):
cache={}
def ret(*args):
if args not in cache:
cache[args]=f(*args)
if isinstance(cache[args], (GeneratorType, Tee)):
# the original can't be used any more,
# so we need to change the cache as well
cache[args], r = tee(cache[args])
return r
return cache[args]
return ret
from itertools import tee
sequence, memoized_sequence = tee (sequence, 2)
Done.
It is easier for generators because the standard lib has this "tee" method!
Yes. There's a decorator posted here. Take note that as the poster says, you lose some of the benefit of lazy evaluation.
def memoize(func):
def inner(arg):
if isinstance(arg, list):
# Make arg immutable
arg = tuple(arg)
if arg in inner.cache:
print "Using cache for %s" % repr(arg)
for i in inner.cache[arg]:
yield i
else:
print "Building new for %s" % repr(arg)
temp = []
for i in func(arg):
temp.append(i)
yield i
inner.cache[arg] = temp
inner.cache = {}
return inner
#memoize
def gen(x):
if not x:
yield 0
return
for i in xrange(len(x)):
for a in gen(x[i + 1:]):
yield a + x[0]
print "Round 1"
for a in gen([2, 3, 4, 5]):
print a
print
print "Round 2"
for a in gen([2, 3, 4, 5]):
print a
Similar to the other answers, but simpler if you know that f is a generator:
def memoized_generator(f):
cache = {}
#functools.wraps(f)
def wrapper(*args, **kwargs):
k = args, frozenset(kwargs.items())
it = cache[k] if k in cache else f(*args, **kwargs)
cache[k], result = itertools.tee(it)
return result
return wrapper
Related
I made up this simple, contrived example of some code I ran into at work. I'm trying to better understand why slow_function_1 (+ the way its decorators are structured) would cache function results properly, but the decorator applied to slow_function_2 would not. In this example, I'm trying to access cache information after calling the method; however, I consistently get the following error: AttributeError: 'function' object has no attribute 'cache_info'. I've searched high and low to try to fix this, but to no avail. This AttributeError is raised for both slow_function_1.cache_info() and slow_function_2.cache_info()
How do I view the cache between function calls? If anyone has any insight on the original problem of why slow_function_1 and slow_function_2 differ in caching behavior, I would appreciate that as well.
Thank you in advance!
import functools
import time
def format_args(func):
def inner(*args, **kwargs):
formatted_args = [tuple(x) if type(x) == list else x for x in args]
return func(*formatted_args, **kwargs)
return inner
def formatted_cache(func):
def inner(*args, **kwargs):
formatted_args = [tuple(x) if type(x) == list else x for x in args]
return functools.lru_cache()(func)(*formatted_args, **kwargs)
return inner
#format_args
#functools.lru_cache
def slow_function_1(a: list, b: bool):
time.sleep(1)
print("executing slow function 1")
return sum(a)
#formatted_cache
def slow_function_2(a: list, b: bool):
time.sleep(1)
print("executing slow function 2")
return functools.reduce((lambda x, y: x*y), a)
example_list = [1,2,3,4,5,6,7,8,9,10,11,12]
example_bool = True
slow_function_1(example_list, example_bool)
print(slow_function_1.cache_info())
slow_function_1(example_list, example_bool)
print(slow_function_1.cache_info())
slow_function_2(example_list, example_bool)
print(slow_function_2.cache_info())
slow_function_2(example_list, example_bool)
print(slow_function_2.cache_info())
Now that I stared at it for a good time, I don't think it's really possible to do this with a decorator. You need a lru_cache object to access the cache and all that stuff, and you need a second function to format the arguments to be hashable before passing to the lru_cache object. The decorator can't return both at once, and they can't be nested in each other to make one function with the best of both worlds.
def formatted_cache(func):
# first we assume func only takes in hashable arguments
# so cachedfunc only takes in hashable arguments
cachedfunc = functools.lru_cache(func)
# inner formats lists to hashable tuples
# then passes it to cachedfunc
def inner(*args, **kwargs):
formatted_args = [tuple(x) if type(x) == list else x for x in args]
return cachedfunc(*formatted_args, **kwargs)
# oh no, we can only return one function, but neither is good enough
I think the only way to move forward is to just accept that these have to be done in separate functions because of lru_cache's limitation. It's not that awkward, actually, just a simple higher order function like map.
import functools
import time
def formatted_call(func, *args, **kwargs):
formatted_args = [tuple(x) if type(x) == list else x for x in args]
return func(*formatted_args, **kwargs)
#functools.lru_cache
def slow_function_2(a: list, b: bool):
time.sleep(1)
print("executing slow function 2")
return functools.reduce((lambda x, y: x*y), a)
example_list = [1,2,3,4,5,6,7,8,9,10,11,12]
example_bool = True
formatted_call(slow_function_2, example_list, example_bool)
print(slow_function_2.cache_info())
formatted_call(slow_function_2, example_list, example_bool)
print(slow_function_2.cache_info())
I have a simple decorator to track the runtime of a function call:
def timed(f):
def caller(*args):
start = time.time()
res = f(*args)
end = time.time()
return res, end - start
return caller
This can be used as follows, and returns a tuple of the function result and the execution time.
#timed
def test(n):
for _ in range(n):
pass
return 0
print(test(900)) # prints (0, 2.69e-05)
Simple enough. But now I want to apply this to recursive functions. Applying the above wrapper to a recursive function results in nested tuples with the times of each recursive call, as is expected.
#timed
def rec(n):
if n:
return rec(n - 1)
else:
return 0
print(rec(3)) # Prints ((((0, 1.90e-06), 8.10e-06), 1.28e-05), 1.90e-05)
What's an elegant way to write the decorator so that it handles recursion properly? Obviously, you could wrap the call if a timed function:
#timed
def wrapper():
return rec(3)
This will give a tuple of the result and the time, but I want all of it to be handled by the decorator so that the caller does not need to worry about defining a new function for every call. Ideas?
The problem here isn't really the decorator. The problem is that rec needs rec to be a function that behaves one way, but you want rec to be a function that behaves differently. There's no clean way to reconcile that with a single rec function.
The cleanest option is to stop requiring rec to be two things at once. Instead of using decorator notation, assign timed(rec) to a different name:
def rec(n):
...
timed_rec = timed(rec)
If you don't want two names, then rec needs to be written to understand the actual value that the decorated rec will return. For example,
#timed
def rec(n):
if n:
val, runtime = rec(n-1)
return val
else:
return 0
I prefer the other answers so far (particularly user2357112's answer), but you can also make a class-based decorator that detects whether the function has been activated, and if so, bypasses the timing:
import time
class fancy_timed(object):
def __init__(self, f):
self.f = f
self.active = False
def __call__(self, *args):
if self.active:
return self.f(*args)
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
self.active = False
return res, end - start
#fancy_timed
def rec(n):
if n:
time.sleep(0.01)
return rec(n - 1)
else:
return 0
print(rec(3))
(class written with (object) so that this is compatible with py2k and py3k).
Note that to really work properly, the outermost call should use try and finally. Here's the fancied up fancy version of __call__:
def __call__(self, *args):
if self.active:
return self.f(*args)
try:
start = time.time()
self.active = True
res = self.f(*args)
end = time.time()
return res, end - start
finally:
self.active = False
You could structure your timer in a different way by *ahem* abusing the contextmanager and function attribute a little...
from contextlib import contextmanager
import time
#contextmanager
def timed(func):
timed.start = time.time()
try:
yield func
finally:
timed.duration = time.time() - timed.start
def test(n):
for _ in range(n):
pass
return n
def rec(n):
if n:
time.sleep(0.05) # extra delay to notice the difference
return rec(n - 1)
else:
return n
with timed(rec) as r:
print(t(10))
print(t(20))
print(timed.duration)
with timed(test) as t:
print(t(555555))
print(t(666666))
print(timed.duration)
Results:
# recursive
0
0
1.5130000114440918
# non-recursive
555555
666666
0.053999900817871094
If this is deemed a bad hack I'll gladly accept your criticism.
Although it is not an overall solution to the problem of integrating recursion with decorators, for the problem of timing only, I have verified that the last element of the tuple of the times is the overall run time, as this is the time from the upper-most recursive call. Thus if you had
#timed
def rec():
...
to get the overall runtime given the original function definitions you could simply do
rec()[1]
Getting the result of the call, on the other hand, would then require recusing through the nested tuple:
def get(tup):
if isinstance(tup, tuple):
return get(tup[0])
else:
return tup
This might be too complicated to simply get the result of your function.
I encountered the same issue when trying to profile a simple quicksort implementation.
The main issue is that decorators are executed on each function call and we need something that can keep a state, so we can sum all calls at the end. Decorators are not the right tool the job
However, one idea is to abuse the fact that functions are objects and can have atributes. This is explored below with a simple decorator. Something that must be understood is that, by using decorator's sintax sugar (#), the function will always be accumulating its timings.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
# usage
#timeit
def rec(n):
...
if __name__ == "__main__":
result = rec(4) # rec result
print(f"Took {rec.timed:.2f} seconds")
# Out: Took 3.39 seconds
result = rec(4) # rec result
# timings between calls are accumulated
# Out: Took 6.78 seconds
Which brings us to a solution inspired by #r.ook, below is a simple context manager that stores each run timing and prints its sum at the end (__exit__). Notice that, because for each timing we require a with statement, this will not accumulate different runs.
from typing import Any, Callable
from time import perf_counter
class timeit:
def __init__(self, func: Callable) -> None:
self.func = func
self.timed = []
def __call__(self, *args: Any, **kwds: Any) -> Any:
start = perf_counter()
res = self.func(*args, **kwds)
end = perf_counter()
self.timed.append(end - start)
return res
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
# TODO: report `exc_*` if an exception get raised
print(f"Took {sum(self.timed):.2f} seconds")
return
# usage
def rec(n):
...
if __name__ == "__main__":
with timeit(rec) as f:
result = f(a) # rec result
# Out: Took 3.39 seconds
I have the following code, in which I simply have a decorator for caching a function's results, and as a concrete implementation, I used the Fibonacci function.
After playing around with the code, I wanted to print the cache variable, that's initiated in the cache wrapper.
(It's not because I suspect the cache might be faulty, I simply want to know how to access it without going into debug mode and put a breakpoint inside the decorator)
I tried to explore the fib_w_cache function in debug mode, which is supposed to actually be the wrapped fib_w_cache, but with no success.
import timeit
def cache(f, cache = dict()):
def args_to_str(*args, **kwargs):
return str(args) + str(kwargs)
def wrapper(*args, **kwargs):
args_str = args_to_str(*args, **kwargs)
if args_str in cache:
#print("cache used for: %s" % args_str)
return cache[args_str]
else:
val = f(*args, **kwargs)
cache[args_str] = val
return val
return wrapper
#cache
def fib_w_cache(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_w_cache(n-2) + fib_w_cache(n-1)
def fib_wo_cache(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_wo_cache(n-1) + fib_wo_cache(n-2)
print(timeit.timeit('[fib_wo_cache(i) for i in range(0,30)]', globals=globals(), number=1))
print(timeit.timeit('[fib_w_cache(i) for i in range(0,30)]', globals=globals(), number=1))
I admit this is not an "elegant" solution in a sense, but keep in mind that python functions are also objects. So with some slight modification to your code, I managed to inject the cache as an attribute of a decorated function:
import timeit
def cache(f):
def args_to_str(*args, **kwargs):
return str(args) + str(kwargs)
def wrapper(*args, **kwargs):
args_str = args_to_str(*args, **kwargs)
if args_str in wrapper._cache:
#print("cache used for: %s" % args_str)
return wrapper._cache[args_str]
else:
val = f(*args, **kwargs)
wrapper._cache[args_str] = val
return val
wrapper._cache = {}
return wrapper
#cache
def fib_w_cache(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_w_cache(n-2) + fib_w_cache(n-1)
#cache
def fib_w_cache_1(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_w_cache(n-2) + fib_w_cache(n-1)
def fib_wo_cache(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_wo_cache(n-1) + fib_wo_cache(n-2)
print(timeit.timeit('[fib_wo_cache(i) for i in range(0,30)]', globals=globals(), number=1))
print(timeit.timeit('[fib_w_cache(i) for i in range(0,30)]', globals=globals(), number=1))
print(fib_w_cache._cache)
print(fib_w_cache_1._cache) # to prove that caches are different instances for different functions
cache is of course a perfectly normal local variable in scope within the cache function, and a perfectly normal nonlocal cellvar in scope within the wrapper function, so if you want to access the value from there, you just do it—as you already are.
But what if you wanted to access it from somewhere else? Then there are two options.
First, cache happens to be defined at the global level, meaning any code anywhere (that hasn't hidden it with a local variable named cache) can access the function object.
And if you're trying to access the values of a function's default parameters from outside the function, they're available in the attributes of the function object. The inspect module docs explain the inspection-oriented attributes of each builtin type:
__defaults__ is a sequence of the values for all positional-or-keyword parameters, in order.
__kwdefaults__ is a mapping from keywords to values for all keyword-only parameters.
So:
>>> def f(a, b=0, c=1, *, d=2, e=3): pass
>>> f.__defaults__
(0, 1)
>>> f.__kwdefaults__
{'e': 3, 'd': 2}
So, for a simple case where you know there's exactly one default value and know which argument it belongs to, all you need is:
>>> cache.__defaults__[0]
{}
If you need to do something more complicated or dynamic, like get the default value for c in the f function above, you need to dig into other information—the only way to know that c's default value will be the second one in __defaults__ is to look at the attributes of the function's code object, like f.__code__.co_varnames, and figure it out from there. But usually, it's better to just use the inspect module's helpers. For example:
>>> inspect.signature(f).parameters['c'].default
1
>>> inspect.signature(cache).parameters['cache'].default
{}
Alternatively, if you're trying to access the cache from inside fib_w_cache, while there's no variable in lexical scope in that function body you can look at, you do know that the function body is only called by the decorator wrapper, and it is available there.
So, you can get your stack frame
frame = inspect.currentframe()
… follow it back to your caller:
back = frame.f_back
… and grab it from that frame's locals:
back.f_locals['cache']
It's worth noting that f_locals works like the locals function: it's actually a copy of the internal locals storage, so modifying it may have no effect, and that copy flattens nonlocal cell variables to regular local variables. If you wanted to access the actual cell variable, you'd have to grub around in things like back.f_code.co_freevars to get the index and then dig it out of the function object's __closure__. But usually, you don't care about that.
Just for a sake of completeness, python has caching decorator built-in in functools.lru_cache with some inspecting mechanisms:
from functools import lru_cache
#lru_cache(maxsize=None)
def fib_w_cache(n):
if n == 0: return 0
elif n == 1: return 1
else:
return fib_w_cache(n-2) + fib_w_cache(n-1)
print('fib_w_cache(10) = ', fib_w_cache(10))
print(fib_w_cache.cache_info())
Prints:
fib_w_cache(10) = 55
CacheInfo(hits=8, misses=11, maxsize=None, currsize=11)
I managed to find a solution (in some sense by #Patrick Haugh's advice).
I simply accessed cache.__defaults__[0] which holds the cache's dict.
The insights about the shared cache and how to avoid it we're also quite useful.
Just as a note, the cache dictionary can only be accessed through the cache function object. It cannot be accessed through the decorated functions (at least as far as I understand). It logically aligns well with the fact that the cache is shared in my implementation, where on the other hand, in the alternative implementation that was proposed, it is local per decorated function.
You can make a class into a wrapper.
def args_to_str(*args, **kwargs):
return str(args) + str(kwargs)
class Cache(object):
def __init__(self, func):
self.func = func
self.cache = {}
def __call__(self, *args, **kwargs):
args_str = args_to_str(*args, **kwargs)
if args_str in self.cache:
return self.cache[args_str]
else:
val = self.func(*args, **kwargs)
self.cache[args_str] = val
return val
Each function has its own cache. you can access it by calling function.cache. This also allows for any methods you wish to attach to your function.
If you wanted all decorated functions to share the same cache, you could use a class variable instead of an instance variable:
class SharedCache(object):
cache = {}
def __init__(self, func):
self.func = func
#rest of the the code is the same
#SharedCache
def function_1(stuff):
things
I have a function of the form:
def my_func(my_list):
for i, thing in enumerate(my_list):
my_val = another_func(thing)
if i == 0:
# do some stuff
else:
if my_val == something:
return my_func(my_list[:-1])
# do some other stuff
The recursive part is getting called enough that I am getting a RecursionError, so I am trying to replace it with a while loop as explained here, but I can't work out how to reconcile this with the control flow statements in the function. Any help would be gratefully received!
There may be a good exact answer, but the most general (or maybe quick-and-dirty) way to switch from recursion to iteration is to manage the stack yourself. Just do manually what programming language does implicitly and have your own unlimited stack.
In this particular case there is tail recursion. You see, my_func recursive call result is not used by the caller in any way, it is immediately returned. What happens in the end is that the deepest recursive call's result bubbles up and is being returned as it is. This is what makes #outoftime's solution possible. We are only interested in into-recursion pass, as the return-from-recursion pass is trivial. So the into-recursion pass is replaced with iterations.
def my_func(my_list):
run = True
while run:
for i, thing in enumerate(my_list):
my_val = another_func(thing)
if i == 0:
# do some stuff
else:
if my_val == something:
my_list = my_list[:-1]
break
# do some other stuff
This is an iterative method.
Decorator
class TailCall(object):
def __init__(self, __function__):
self.__function__ = __function__
self.args = None
self.kwargs = None
self.has_params = False
def __call__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.has_params = True
return self
def __handle__(self):
if not self.has_params:
raise TypeError
if type(self.__function__) is TailCaller:
return self.__function__.call(*self.args, **self.kwargs)
return self.__function__(*self.args, **self.kwargs)
class TailCaller(object):
def __init__(self, call):
self.call = call
def __call__(self, *args, **kwargs):
ret = self.call(*args, **kwargs)
while type(ret) is TailCall:
ret = ret.__handle__()
return ret
#TailCaller
def factorial(n, prev=1):
if n < 2:
return prev
return TailCall(factorial)(n-1, n * prev)
To use this decorator simply wrap your function with #TailCaller decorator and return TailCall instance initialized with required params.
I'd like to say thank you for inspiration to #o2genum and to Kyle Miller who wrote an excellent article about this problem.
Despite how good is to remove this limitation, probably, you have to be
aware of why this feature is not officially supported.
I am writing a function ChrNumber that converts Arab number string to Chinese financial number string. I work out a tree recursion form. But when I tried to get a tail-recursion form, it is really difficult for me to handle the situation bit equals 6,7 or 8 or 10 and bigger ones.
You can see how it works at the end of my question.
Here's the tree-recursion solution. It works:
# -*- coding:utf-8 -*-
unitArab=(2,3,4,5,9)
#unitStr=u'十百千万亿' #this is an alternative
unitStr=u'拾佰仟万亿'
unitDic=dict(zip(unitArab,(list(unitStr))))
numArab=list(u'0123456789')
#numStr=u'零一二三四五六七八九' #this is an alternative
numStr=u'零壹贰叁肆伍陆柒捌玖'
numDic=dict(zip(numArab,list(numStr)))
def ChnNumber(s):
def wrapper(v):
'this is to adapt the string to a abbreviation'
if u'零零' in v:
return wrapper(v.replace(u'零零',u'零'))
return v[:-1] if v[-1]==u'零' else v
def recur(s,bit):
'receives the number sting and its length'
if bit==1:
return numDic[s]
if s[0]==u'0':
return wrapper(u'%s%s' % (u'零',recur(s[1:],bit-1)))
if bit<6 or bit==9:
return wrapper(u'%s%s%s' % (numDic[s[0]],unitDic[bit],recur(s[1:],bit-1)))
'below is the hard part to be converted to tail-recurion'
if bit<9:
return u'%s%s%s' % (recur(s[:-4],bit-4),u"万",recur(s[-4:],4))
if bit>9:
return u'%s%s%s' % (recur(s[:-8],bit-8),u"亿",recur(s[-8:],8))
return recur(s,len(s))
My attempt version is only in recur function, I use a closure res and move the bit inside the recur so there is less arguments.:
res=[]
def recur(s):
bit=len(s)
print s,bit,res
if bit==0:
return ''.join(res)
if bit==1:
res.append(numDic[s])
return recur(s[1:])
if s[0]==u'0':
res.append(u'零')
return recur(s[1:])
if bit<6 or bit==9:
res.append(u'%s%s' %(numDic[s[0]],unitDic[bit]))
return recur(s[1:])
if bit<9:
#...can't work it out
if bit>9:
#...can't work it out
the test code is:
for i in range(17):
v1='9'+'0'*(i+1)
v2='9'+'0'*i+'9'
v3='1'*(i+2)
print '%s->%s\n%s->%s\n%s->%s'% (v1,ChnNumber(v1),v2,ChnNumber(v2),v3,ChnNumber(v3))
which should output:
>>>
90->玖拾
99->玖拾玖
11->壹拾壹
900->玖佰
909->玖佰零玖
111->壹佰壹拾壹
9000->玖仟
9009->玖仟零玖
1111->壹仟壹佰壹拾壹
90000->玖万
90009->玖万零玖
11111->壹万壹仟壹佰壹拾壹
900000->玖拾万
900009->玖拾万零玖
111111->壹拾壹万壹仟壹佰壹拾壹
9000000->玖佰万
9000009->玖佰万零玖
1111111->壹佰壹拾壹万壹仟壹佰壹拾壹
90000000->玖仟万
90000009->玖仟万零玖
11111111->壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
900000000->玖亿
900000009->玖亿零玖
111111111->壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
9000000000->玖拾亿
9000000009->玖拾亿零玖
1111111111->壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
90000000000->玖佰亿
90000000009->玖佰亿零玖
11111111111->壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
900000000000->玖仟亿
900000000009->玖仟亿零玖
111111111111->壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
9000000000000->玖万亿
9000000000009->玖万亿零玖
1111111111111->壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
90000000000000->玖拾万亿
90000000000009->玖拾万亿零玖
11111111111111->壹拾壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
900000000000000->玖佰万亿
900000000000009->玖佰万亿零玖
111111111111111->壹佰壹拾壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
9000000000000000->玖仟万亿
9000000000000009->玖仟万亿零玖
1111111111111111->壹仟壹佰壹拾壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
90000000000000000->玖亿亿
90000000000000009->玖亿亿零玖
11111111111111111->壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
900000000000000000->玖拾亿亿
900000000000000009->玖拾亿亿零玖
111111111111111111->壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹
Python doesn't support tail call elimination nor tail call optimizations. However, there are a number of ways in which you can mimic this approach (Trampolines being the most widely used in other languages.)
Tail call recursive functions should look like the following pseudo code:
def tail_call(*args, acc):
if condition(*args):
return acc
else:
# Operations happen here, producing new_args and new_acc
return tail_call(*new_args, new_acc)
For your example I would not form a closure over anything as your are introducing side-effects and stateful manipulation. Instead, anything that needs to be modified should be modified in isolation of everything else. That makes it easier to reason about.
Copy whatever you're attempting to change (using string.copy for the final output) and pass it in as an argument to the next recursive call. That's where the acc variable comes into play. It's "accumulating" all your changes up to that point.
A classical trampoline can be had from this snippet. There, they are wrapping the function in an object which will eventually either result a result or return another function object which should be called. I prefer this approach as I find it easier to reason about.
This isn't the only way. Take a look at this code snippet. The "magic" occurs when it reaches a point which "solves" the condition and it throws an exception to escape the infinite loop.
Finally, you can read about Trampolines here, here and here.
I keep studying this question off and on these days. and now, I work it out!
NOTE,not just tail-recursion, it's also pure Functional Programming!
The key is to think in a different way (tree-recursion version is processing numbers from left to right while this version is from right to left)
unitDic=dict(zip(range(8),u'拾佰仟万拾佰仟亿'))
numDic=dict(zip('0123456789',u'零壹贰叁肆伍陆柒捌玖'))
wapDic=[(u'零拾',u'零'),(u'零佰',u'零'),(u'零仟',u'零'),
(u'零万',u'万'),(u'零亿',u'亿'),(u'亿万',u'亿'),
(u'零零',u'零'),]
#pure FP
def ChnNumber(s):
def wrapper(s,wd=wapDic):
def rep(s,k,v):
if k in s:
return rep(s.replace(k,v),k,v)
return s
if not wd:
return s
return wrapper(rep(s,*wd[0]),wd[1:])
def recur(s,acc='',ind=0):
if s=='':
return acc
return recur(s[:-1],numDic[s[-1]]+unitDic[ind%8]+acc,ind+1)
def end(s):
if s[-1]!='0':
return numDic[s[-1]]
return ''
def result(start,end):
if end=='' and start[-1]==u'零':
return start[:-1]
return start+end
return result(wrapper(recur(s[:-1])),end(s))
for i in range(18):
v1='9'+'0'*(i+1)
v2='9'+'0'*i+'9'
v3='1'*(i+2)
print ('%s->%s\n%s->%s\n%s->%s'% (v1,ChnNumber(v1),v2,ChnNumber(v2),v3,ChnNumber(v3)))
if any one say that it won't work when facing a huge number(something like a billion-figure number), yeah, I admit that, but this version can solve it(while it will not be pure FP but pure FP won't need this version so..):
class TailCaller(object) :
def __init__(self, f) :
self.f = f
def __call__(self, *args, **kwargs) :
ret = self.f(*args, **kwargs)
while type(ret) is TailCall :
ret = ret.handle()
return ret
class TailCall(object) :
def __init__(self, call, *args, **kwargs) :
self.call = call
self.args = args
self.kwargs = kwargs
def handle(self) :
if type(self.call) is TailCaller :
return self.call.f(*self.args, **self.kwargs)
else :
return self.f(*self.args, **self.kwargs)
def ChnNumber(s):
def wrapper(s,wd=wapDic):
#TailCaller
def rep(s,k,v):
if k in s:
return TailCall(rep,s.replace(k,v),k,v)
return s
if not wd:
return s
return wrapper(rep(s,*wd[0]),wd[1:])
#TailCaller
def recur(s,acc='',ind=0):
if s=='':
return acc
return TailCall(recur,s[:-1],numDic[s[-1]]+unitDic[ind%8]+acc,ind+1)
def end(s):
if s[-1]!='0':
return numDic[s[-1]]
return ''
def result(start,end):
if end=='' and start[-1]==u'零':
return start[:-1]
return start+end
return result(wrapper(recur(s[:-1])),end(s))