Generate function with arguments filled in when creating it? - python

My goal is to generate functions dynamically and then save them in a file. For e.g, in my current attempt, On calling create_file
import io
def create_file(a_value):
a_func = make_concrete_func(a_value)
write_to_file([a_func], '/tmp/code.py')
def make_concrete_func(a_value):
def concrete_func(b, k):
return b + k + a_value
return concrete_func
def write_to_file(code_list, path):
import inspect
code_str_list = [inspect.getsource(c) for c in code_list]
with open(path, 'w') as ofh:
for c in code_str_list:
fh = io.StringIO(c)
ofh.writelines(fh.readlines())
ofh.write('\n')
create_file('my_value')
The output I want is (file /tmp/code.py):
def concrete_func(b, k):
return b + k + 'my_value'
The output I get is (file '/tmp/code.py'):
def concrete_func(b, k):
return b + k + a_value
UPDATE: My solution uses inspect.getsource which returns a string. I wonder if I have limited your options as most solutions below suggest a string replacement. The solution need not use inspect.getsource. You could write it anyhow to get the desired output.
UPDATE 2: The reason I am doing this is because I want to generate a file for Amazon Lambda. Amazon Lambda takes a python file and its virtual environment and will execute it for you(relieving you from worrying about scalability and fault tolerance). You have to tell Lambda which file and which function to call and Lambda will execute it for you.

A function definition doesn't look up its free variables (variables that are not defined in the function itself) at time of definition. I.e. concrete_func here:
def make_concrete_func(a_value):
def concrete_func(b, k):
return b + k + a_value
return concrete_func
doesn't look up a_value when it is defined, instead it will contain code to load a_value from its closure (simplified the enclosing function) at runtime.
You can see this by disassembling the returned function:
f = make_concrete_func(42)
import dis
print dis.dis(f)
3 0 LOAD_FAST 0 (b)
3 LOAD_FAST 1 (k)
6 BINARY_ADD
7 LOAD_DEREF 0 (a_value)
10 BINARY_ADD
11 RETURN_VALUE
None
You can maybe do what you want by editing the byte code.. it's been done before (http://bytecodehacks.sourceforge.net/bch-docs/bch/module-bytecodehacks.macro.html ..shudder).

Use getsource to convert the function to a string, and replace the variable names with simple string manipulation.
from inspect import getsource
def write_func(fn, path, **kwargs):
fn_as_string = getsource(fn)
for var in kwargs:
fn_as_string = fn_as_string.replace(var, kwargs[var])
with open(path, 'a') as fp: # append to file
fp.write('\n' + fn_as_string)
def base_func(b, k):
return b + k + VALUE
# add quotes to string literals
write_func(base_func, '/tmp/code.py', VALUE="'my value'")
# you should replace the function name if you write multiple functions to the file
write_func(base_func, '/tmp/code.py', base_func='another_func', VALUE='5')
Output is as expected in /tmp/code.py:
def base_func(b, k):
return b + k + 'my value'
def another_func(b, k):
return b + k + 5

Try this. Note that I have added another parameter to write_to_file
def write_to_file(code_list, path,a_value):
print "lc",code_list
code_str_list = [inspect.getsource(c) for c in code_list]
with open(path, 'w') as ofh:
for c in code_str_list:
c= c.replace('a_value','\''+a_value+'\'')
fh = io.StringIO(c)
ofh.writelines(fh.readlines())
ofh.write('\n')

If the file doesn't have to be human readable and you trust it won't be manipulated by attackers, combining functools.partial and pickle might be the most pythonic approach. However it comes with disadvantages I don't completely understand: for one thing it doesn't seem to work with locally defined functions (or maybe locally defined variables in general?).
I might just ask my own question about this.
import functools
import pickle
def write_file_not_working():
def concrete_func_not_working(b, k, a_value):
return b + k + a_value
with open('data.pickle', 'wb') as f:
data = functools.partial(concrete_func_not_working, a_value='my_value')
pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
def use_file_not_working():
with open('data.pickle', 'rb') as f:
resurrected_data = pickle.load(f)
print(resurrected_data('hi', 'there'))
def top_level_concrete_func(b, k, a_value):
return a_value + b + k
def write_file_working():
with open('working.pickle', 'wb') as f:
data = functools.partial(top_level_concrete_func, a_value='my_working_value')
pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
def use_file_working():
with open('working.pickle', 'rb') as f:
resurrected_data = pickle.load(f)
print(resurrected_data('hi', 'there'))
if __name__ == "__main__":
write_file_working()
use_file_working()
write_file_not_working()
use_file_not_working()

#Ben made me realize that I didn't need to use a string based approach for code generation and that I could use serialization. Instead of the limited pickle library, I used dill which overcomes the limitation as mentioned by Ben
So, I finally did something like.
import dill
def create_file(a_value, path):
a_func = make_concrete_func(a_value)
dill.dump(a_func, open(path, "wb"))
return path
def make_concrete_func(a_value):
def concrete_func(b, k):
return b + k + a_value
return concrete_func
if __name__ == '__main__':
path = '/tmp/code.dill'
create_file('Ben', path)
a_func = dill.load(open(path, "rb"))
print(a_func('Thank ', 'You '))

if the function you want to create all have a determinate pattern, I would create a template for it and use it to mass produce the functions
>>> def test(*values):
template="""
def {name}(b,k):
return b + k + {value}
"""
for i,v in enumerate(values):
print( template.format(name="func{}".format(i),value=repr(v)) )
>>> test("my_value",42,[1])
def func0(b,k):
return b + k + 'my_value'
def func1(b,k):
return b + k + 42
def func2(b,k):
return b + k + [1]
>>>

Related

How to chain Python function calls so the behaviour is as follows

I stumbled upon the following problem on a python challenge: Write a function that satisfies the following rule for any number of function calls.
f()()()()()(s) == 'fooooo' + s;
example:
f('it') == 'fit';
f()('x') == 'fox';
f()()('bar') == 'foobar';
f()()()('l') == 'foool';
The function should be stateless and should not use any variables outside the scope.
The function signature was:
def f(s=None):
# Your code here
I thought that in order to be able to chain multiple calls we will have to return a function when no string is passed into the function, but can't figure out how to build the expected string with no external variables. Suggestions?
def f(s=None):
if s is None:
# concatenate an 'o'?
return f
else:
# Last call, return the result str.
return s
An alternative to Nikola's answer is something like this:
def f(s=None):
if s: return f'f{s}'
def factory(prefix):
def inner(s=None):
return f'f{prefix}{s}' if s else factory(prefix + 'o')
return inner
return factory('o')
using a closure and no helper function.
Obviously, you need to store the number of 'o' somewhere in the memory (e.g. the code) of f. To achieve this, you can benefit from these 2 features of Python:
You can define functions inside other functions
There's this thing called argument binding which allows you to tell Python to fix the some or all of the arguments of your function. This is done through functools.partial
And here's the solution
from functools import partial
def f(s=None):
# Define a new function g which takes the current text and takes s
def g(current_text, s=None):
if s is not None:
return current_text + s
else:
# If called with an empty argument, just rebind current_text with an extra o
return partial(g, current_text + "o")
# Just call g with the initial conditions
return g("f", s)
print(f()()()()()("s")) # fooooos
print(f("s")) # fs
You can try this:
def f(s=None):
string = "f"
def ret(p=None):
nonlocal string
string += "o"
return ret if not p else string + p
return ret if not s else string + s
This is my go at it:
def f(x=None, seq=''):
if x:
return 'f' + seq + x
else:
def g(y=None, p=seq + 'o'):
return f(y, p)
return g
Edit If you really need the function signature to be f(x=None), use this:
def f(x=None):
def f_(x=None, seq=''):
if x:
return 'f' + seq + x
else:
def g(y=None, p=seq + 'o'):
return f_(y, p)
return g
return f_(x)
:^)
Just for printing the string:
def f(s=None):
def fo(s=None):
if s==None:
print('o',end='')
return fo
else:
print(s)
return
if s!=None:
print('f',end='')
print(s)
return
elif s==None:
print('fo',end='')
return fo
Cute problem. This is a compact way to do it:
def f(s=None, prefix="f"):
if s: return prefix + s
return lambda s=None: f(s, prefix=prefix+"o")
FP:
f=lambda s=None,prefix='f':prefix+s if s else lambda s=None,prefix=prefix+'o':f(s,prefix)

Python : how to use another method's returning value in another method?

what i am trying to do is get a returning value from abcd method, and use this value to as a the substitue of fname and the error is continues to occur.
how can i fix this error?
ICB164000395.txt has four lines.
and i want line_count print out 4(The number of lines in the text file)
class Test():
def abcd(self):
self.a = a
a = 'ICB164000395.txt'
return a
def line_count(self, fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
print(i + 1)
t = Test()
t.line_count(abcd())
and the error appears like this
Traceback (most recent call last):
File "C:\Users\mg\Desktop\Tubuc\openAPI\test9.py", line 16, in
t.line_count(abcd(fname))
NameError: name 'abcd' is not defined
Just looking at the function:
def abcd(self):
self.a = a
a = 'ICB164000395.txt'
return a
I'm guessign you're getting an error at self.a = a .. Because a is not defined yet. It's not passed in either.
I think what you want is:
class Test():
def abcd(self):
a = 'ICB164000395.txt' # you'll need to correct the path to this file
return a
def line_count(self, fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
print(i + 1)
t = Test()
t.line_count(t.abcd())
abcd is an instance method so you have to call it from an instance of your class
t = Test()
t.line_cont(t.abcd())
Your abcd method also uses the variable a before it is ever defined, so you could change it to
def abcd(self):
self.a = 'ICB164000395.txt'
return self.a
It appears what you want from your abcd method is typically handled in an init. You can set the file name when you instantiate a Test object. Then you can call the line count. Your line_count method should also specify how you are opening the file 'r' for read mode.
class Test():
def __init__(self, file_name):
self._file_name = file_name
def line_count(self):
with open(self._file_name, 'r') as f:
for i, l in enumerate(f):
pass
return i + 1
print(i + 1)
t = Test('ICB164000395.txt')
t.line_count()

Reduce indentation of nested functions in Python code generation

My goal is to generate functions dynamically and then save them in a file. For e.g, in my current attempt, On calling create_file
import io
def create_file():
nested_func = make_nested_func()
write_to_file([nested_func, a_root_func], '/tmp/code.py')
def a_root_func(x):
pass
def make_nested_func():
def a_nested_func(b, k):
return b, k
return a_nested_func
def write_to_file(code_list, path):
import inspect
code_str_list = [inspect.getsource(c) for c in code_list]
with open(path, 'w') as ofh:
for c in code_str_list:
fh = io.StringIO(c)
ofh.writelines(fh.readlines())
ofh.write('\n')
create_file()
The output I want is('/tmp/code.py'):
def a_nested_func(b, k):
return b, k
def a_root_func(x):
pass
The output I get is('/tmp/code.py'):
def a_nested_func(b, k):
return b, k
def a_root_func(x):
pass
a_nested_func is indented. How can I reduce the indentation? I can do lstrip etc. but I wonder if there is a better way.
Use the textwrap.dedent() function to remove the common leading whitespace:
import inspect
from textwrap import dedent
def write_to_file(code_list, path):
code_str_list = [inspect.getsource(c) for c in code_list]
with open(path, 'w') as ofh:
for c in code_str_list:
dedented = dedent(c)
ofh.write(dedented + '\n')
Note that there is no need for a StringIO(string).readlines() dance here.
There's a function in built-in module, textwrap.dedent.
import textwrap
s = """
abc
def
"""
s2 = """
abc
def
"""
assert textwrap.dedent(s) == s2

Chain memoizer in python

I already have a memoizer that works quite well. It uses the pickle dumps to serializes the inputs and creates MD5 hash as a key. The function results are quite large and are stored as pickle files with file name being MD5 hash. When I call two memoized function one after another, the memoizer will load the output of the first function and pass it to the second function. The second function will serialize it, create MD5 and then load the output. Here is a very simple code:
#memoize
def f(x):
...
return y
#memoize
def g(x):
...
return y
y1 = f(x1)
y2 = g(y1)
y1 is loaded from the disk when f is evaluated and then it's serialized when g is evaluated. Is it possible to somehow bypass this step and pass the key of y1 (i.e. MD5 hash) to g? If g already has this key, it loads the y2 from the disk. If it doesn't, it "requests" the full y1 for the evaluation of g.
EDIT:
import cPickle as pickle
import inspect
import hashlib
class memoize(object):
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
arg = inspect.getargspec(self.func).args
file_name = self._get_key(*args, **kwargs)
try:
f = open(file_name, "r")
out = pickle.load(f)
f.close()
except:
out = self.func(*args, **kwargs)
f = open(file_name, "wb")
pickle.dump(out, f, 2)
f.close()
return out
def _arg_hash(self, *args, **kwargs):
_str = pickle.dumps(args, 2) + pickle.dumps(kwargs, 2)
return hashlib.md5(_str).hexdigest()
def _src_hash(self):
_src = inspect.getsource(self.func)
return hashlib.md5(_src).hexdigest()
def _get_key(self, *args, **kwargs):
arg = self._arg_hash(*args, **kwargs)
src = self._src_hash()
return src + '_' + arg + '.pkl'
I think you could do it in an automatic fashion but I generally think it's better to be explicit about "lazy" evaluation. So I'll present a way that adds an additional argument to your memoized functions: lazy. But instead of files, pickle and md5 I'll simplify the helpers a bit:
# I use a dictionary as storage instead of files
storage = {}
# No md5, just hash
def calculate_md5(obj):
print('calculating md5 of', obj)
return hash(obj)
# create dictionary entry instead of pickling the data to a file
def create_file(md5, data):
print('creating file for md5', md5)
storage[md5] = data
# Load dictionary entry instead of unpickling a file
def load_file(md5):
print('loading file with md5 of', md5)
return storage[md5]
I use a custom class as intermediate object:
class MemoizedObject(object):
def __init__(self, md5):
self.md5 = result_md5
def get_real_data(self):
print('load...')
return load_file(self.md5)
def __repr__(self):
return '{self.__class__.__name__}(md5={self.md5})'.format(self=self)
And finally I show the changed Memoize assuming your functions take only one argument:
class Memoize(object):
def __init__(self, func):
self.func = func
# The md5 to md5 storage is needed to find the result file
# or result md5 for lazy evaluation.
self.md5_to_md5_storage = {}
def __call__(self, x, lazy=False):
# If the argument is a memoized object no need to
# calculcate the hash, we can just look it up.
if isinstance(x, MemoizedObject):
key = x.md5
else:
key = calculate_md5(x)
if lazy and key in self.md5_to_md5_storage:
# Check if the key is present in the md5 to md5 storage, otherwise
# we can't be lazy
return MemoizedObject(self.md5_to_md5_storage[key])
elif not lazy and key in self.md5_to_md5_storage:
# Not lazy but we know the result
result = load_file(self.md5_to_md5_storage[key])
else:
# Unknown argument
result = self.func(x)
result_md5 = calculate_md5(result)
create_file(result_md5, result)
self.md5_to_md5_storage[key] = result_md5
return result
Now if you call your functions and specify lazy at the correct positions you can avoid loading (unpickling) your file:
#Memoize
def f(x):
return x+1
#Memoize
def g(x):
return x+2
Normal (first) run:
>>> x1 = 10
>>> y1 = f(x1)
calculating md5 of 10
calculating md5 of 11
creating file for md5 11
>>> y2 = g(y1)
calculating md5 of 11
calculating md5 of 13
creating file for md5 13
Without lazy:
>>> x1 = 10
>>> y1 = f(x1)
calculating md5 of 10
loading file with md5 of 11
>>> y2 = g(y1)
calculating md5 of 11
loading file with md5 of 13
With lazy=True
>>> x1 = 10
>>> y1 = f(x1, lazy=True)
calculating md5 of 10
>>> y2 = g(y1)
loading file with md5 of 13
The last option only calculates the "md5" of the first argument and loads the file of the end-result. That should be exactly what you wanted.

How to deal with large csv file in python?

I have CSV file that contains a data of 40k rows.
My each function open csv file and works with it and then close it.
Is there a way that I can open the file once and then close it and I can work with it whenever I want? I tried to put each field in a separate list and work whit it whenever I call it or in dictionary but both methods works good up to 1k row if more then it takes long time to processes it, I found a way to speed up by filtering them, but not sure how to apply it.
sample of my codes.
files=open("myfile.csv","r")
def spec_total():
total = 0.0
files.readline() # skip first row
for line in files:
field=line.strip().split(",") #make Into fields
tall=float(field[0])
if tall >= 9.956:
total +=tall
print("The sum is: %0.5f" % (total))
spec_total()
files.close()
other function
files=open("3124749c.csv","r")
def code():
match= 0
files.readline() # skip first row
for row in files:
field=row.strip().split(",") #make Into fields
code=(field[4])
import re
if re.search(r'\[[A-Za-z][0-9]+\][0-9]+[A-Za-z]{2}[0-9]+#[0-9]+', code) is None:
match += 1
print("The answer that do not match code is :",match)
code()
files.close()
and there is plenty more functions that opens each time the csv file and split them into field in order to recognise which field I am referring to.
If I understand correctly try:
import csv
total = 0.0
for row in csv.reader(open("myfile.csv")):
tall = float(row[0])
if tall >= 9.956:
total += tall
print("The sum is: %0.5f" % total)
More complex version - create calculation classes for processing each row.
class Calc(object):
def process(self,row):
pass
def value(self):
pass
class SumColumn(Calc):
def __init__(self, column=0,tall=9.956):
self.column = column
self.total = 0
def process(self, row):
tall = float(row[0])
if tall >= self.tall:
self.total += tall
def value(self):
return self.total
class ColumnAdder(Calc):
def __init__(self, col1, col2):
self.total = 0
self.col1 = col1
self.col2 = col2
def process(self, row):
self.total += (row[self.col1] + row[self.col2])
def value(self):
return self.total
class ColumnMatcher(Calc):
def __init__(self, col=4):
self.matches = 0
def process(self, row):
code = row[4]
import re
if re.search(r'\[[A-Za-z][0-9]+\][0-9]+[A-Za-z]{2}[0-9]+#[0-9]+', code) is None:
self.match += 1
def value(self):
return self.matches
import csv
col0_sum = SumColumn()
col3_sum = SumColumn(3, 2.45)
col5_6_add = ColumnAdder(5,6)
col4_matches = ColumnMatcher()
for row in csv.reader(open("myfile.csv")):
col0_sum.process(row)
col3_sum.process(row)
col5_6_add.process(row)
col4_matches.process(row)
print col0_sum.value()
print col3_sum.value()
print col5_6_add.value()
print col4_matches.value()
This code was typed into SO, which was a tedious affair - so bare with on syntax etc.
For illustration purposes only - not to be taken too literally.
All is object in Python: that means functions too.
So there is no need to define special classes to craft functions as instances of these classes as sotapme does, since every function we define is already an object in the sense of 'instance of a class'.
Now, if someone needs to create several functions of the same type, for example each of them adds all the values of a precise CSV file's column, that's right that it's interesting to create these many functions by a repeating process.
At this point, raises the question: using function factory or class ?
Personnaly, I prefer the function factory way because it is less verbose.
I also discovered in the Theran's answer HERE that it's also faster.
In the following code, I use a trick with globals() to give a particular name to each function created by means of a function factory. Some will say it's bad, but I don't know why. If there's another way to do the same , I will be happy to learn it.
In the code, 3 functions are build by a function factory, and I let one defined by plain normal definition (op3).
Python is fantastic!
import csv
import re
# To create a CSV file
with open('Data.csv','wb') as csvhandle:
hw = csv.writer(csvhandle)
hw.writerows( ((2,10,'%%',3000,'-statusOK-'),
(5,3,'##',500,'-modo OOOOKKK-'),
(1,60,'**',700,'-- anarada-')) )
del hw
# To visualize the content of the CSV file
with open(r'Data.csv','rb') as f:
print "The CSV file at start :\n "+\
'\n '.join(map(repr,csv.reader(f)))
def run_funcs_on_CSVfile(FUNCS,CSV):
with open(CSV,'rb') as csvhandle:
for f in FUNCS:
# this is necessary for functions not created via
# via a function factory but via plain definition
# that defines only the attribute col of the function
if 'field' not in f.__dict__:
f.field = f.col - 1
# columns are numbered 1,2,3,4,...
# fields are numbered 0,1,2,3,...
for row in csv.reader(csvhandle):
for f in FUNCS:
f(row[f.field])
def SumColumn(name,col,start=0):
def g(s):
g.kept += int(s)
g.kept = start
g.field = col -1
g.func_name = name
globals()[name] = g
def MultColumn(name,col,start=1):
def g(s):
g.kept *= int(s)
g.kept = start
g.field = col - 1
g.func_name = name
globals()[name] = g
def ColumnMatcher(name,col,pat,start = 0):
RE = re.compile(pat)
def g(s,regx = RE):
if regx.search(s):
g.kept += 1
g.kept = start
g.field = col - 1
g.func_name = name
globals()[name] = g
SumColumn('op1',1)
MultColumn('op2',2)
ColumnMatcher('op4',5,'O+K')
def op3(s):
s = int(s)
if s%2:
op3.kept += (2*s)
else:
op3.kept += s
op3.kept = 0
op3.col = 4
print '\nbefore:\n ' +\
'\n '.join('%s.kept == %d'
% (f.func_name, f.kept)
for f in (op1,op2,op3,op4) )
# The treatment is done here
run_funcs_on_CSVfile((op2,op3,op4,op1),r'Data.csv')
# note that the order of the functions in the tuple
# passed as argument can be any either one or another
print '\nafter:\n ' +\
'\n '.join('%s(column %d) in %s.kept == %d'
% (f.func_name, f.field+1, f.func_name, f.kept)
for f in (op1,op2,op3,op4) )
.
result
.
The CSV file at start :
['2', '10', '%%', '3000', '-statusOK-']
['5', '3', '##', '500', '-modo OOOOKKK-']
['1', '60', '**', '700', '-- anarada-']
before:
op1.kept == 0
op2.kept == 1
op3.kept == 0
op4.kept == 0
after:
op1(column 1) in op1.kept == 8
op2(column 2) in op2.kept == 1800
op3(column 4) in op3.kept == 4200
op4(column 5) in op4.kept == 2

Categories

Resources