Starting from a script foo.py find all functions that are in use in local source code (i.e not built-in or third party packages), recursively.
EDIT: I do not want to find recursive functions. I want to find all functions that are in use!
e.g. foo.py
import bar
def not_used():
pass
bar.do_stuff(x,y)
bar.py
import math
def more_stuff(x,y):
result = math.abs(-x+-y)
return result
def do_stuff(x,y):
more_stuff(x,y)
Should return do_stuff & more_stuff
Should ignore not_used & abs
Many thanks
EDIT: Code so far
import dis
py_file = 'foo.py'
with open(py_file) as file:
source_code = file.read()
compiled = compile(source_code, py_file, "exec")
funcs = []
byte_code = dis.Bytecode(compiled)
instructions = list(reversed([x for x in byte_code]))
for (ix, instruction) in enumerate(instructions):
if instruction.opname == "CALL_FUNCTION":
load_func_instr = instructions[ix + instruction.arg + 1]
funcs.append(load_func_instr.argval)
results = [f'{ix}: {funcname}'for (ix, funcname) in enumerate(reversed(funcs), 1)]
You can use Python's ast (abstract syntax tree) module
A short example:
import ast
code = """
import math
def more_stuff(x,y):
result = math.abs(-x+-y)
return result
def do_stuff(x,y):
more_stuff(x,y)
"""
tree = ast.parse(code)
funcs = [x for x in ast.walk(tree) if isinstance(x, ast.FunctionDef)]
print(', '.join(f.name for f in funcs))
prints:
more_stuff, do_stuff
now you can add the tests, you like. For example the SO question
How to find/detect if a build-in function is used in Python AST?
discusses how to detect if a function is being used.
Related
Can I add a prefix and suffix to the source code of functions?
I know about decorators and do not want to use them (the minimal example below doesn't make clear why, but I have my reasons).
def f():
print('world')
g = patched(f,prefix='print("Hello, ");',suffix='print("!");')
g() # Hello, world!
Here is what I have so far:
import inspect
import ast
import copy
def patched(f,prefix,suffix):
source = inspect.getsource(f)
tree = ast.parse(source)
new_body = [
ast.parse(prefix).body[0],
*tree.body[0].body,
ast.parse(suffix).body[0]
]
tree.body[0].body = new_body
g = copy.deepcopy(f)
g.__code__ = compile(tree,g.__code__.co_filename,'exec')
return g
Unfortunately, nothing happens if I use this and then call g() as above; neither world nor Hello, world! are printed.
Here is a rough version of what can be done:
import inspect
import ast
import copy
def patched(f,prefix,suffix):
source = inspect.getsource(f)
tree = ast.parse(source)
new_body = [
ast.parse(prefix).body[0],
*tree.body[0].body,
ast.parse(suffix).body[0]
]
tree.body[0].body = new_body
code = compile(tree,filename=f.__code__.co_filename,mode='exec')
namespace = {}
exec(code,namespace)
g = namespace[f.__name__]
return g
def temp():
pass
def f():
print('world',end='')
g = patched(f,prefix='print("Hello, ",end="")',suffix='print("!",end="")')
g() # Hello, world!
The call of compile compiles an entire module (represented by tree). This module is then executed in an empty namespace from which the desired function is finally extracted. (Warning: the namespace will need to be filled with some globals from where f comes from if f uses those.)
After some more work, here is a real example of what can be done with this. It uses some extended version of the principle above:
import numpy as np
from playground import graphexecute
#graphexecute(verbose=True)
def my_algorithm(x,y,z):
def SumFirstArguments(x,y)->sumxy:
sumxy = x+y
def SinOfThird(z)->sinz:
sinz = np.sin(z)
def FinalProduct(sumxy,sinz)->prod:
prod = sumxy*sinz
def Return(prod):
return prod
print(my_algorithm(x=1,y=2,z=3))
#OUTPUT:
#>>Executing part SumFirstArguments
#>>Executing part SinOfThird
#>>Executing part FinalProduct
#>>Executing part Return
#>>0.4233600241796016
The clou is that I get the exact same output if I reshuffle the parts of my_algorithm, for example like this:
#graphexecute(verbose=True)
def my_algorithm2(x,y,z):
def FinalProduct(sumxy,sinz)->prod:
prod = sumxy*sinz
def SumFirstArguments(x,y)->sumxy:
sumxy = x+y
def SinOfThird(z)->sinz:
sinz = np.sin(z)
def Return(prod):
return prod
print(my_algorithm2(x=1,y=2,z=3))
#OUTPUT:
#>>Executing part SumFirstArguments
#>>Executing part SinOfThird
#>>Executing part FinalProduct
#>>Executing part Return
#>>0.4233600241796016
This works by (1) grabbing the source of my_algorithm and turning it into an ast (2) patching each function defined within my_algorithm (e.g. SumFirstArguments) to return locals (3) deciding based on the inputs and the outputs (as defined by the type hints) in which order the parts of my_algorithm should be executed. Furthermore, a possibility that I do not have implemented yet is to execute independent parts in parallel (such as SumFirstArguments and SinOfThird). Let me know if you want the sourcecode of graphexecute, I haven't included it here because it contains a lot of stuff that is not relevant to this question.
For your problem, you don't need to recompile your functions. Just define a list of functions, you inspect for arguments and return variable name:
def FinalProduct(sumxy, sinz) -> "prod":
return sumxy * sinz
def SumFirstArguments(x, y) -> "sumxy":
return x + y
def SinOfThird(z) -> "sinz":
return np.sin(z)
def execute(funcs, **args):
result = None
while funcs:
func = funcs.pop(0)
try:
kw = {a: args[a]
for a in func.__code__.co_varnames[:func.__code__.co_argcount]
}
except KeyError:
# not all arguments found
funcs.append(func)
else:
print(func,kw)
result = func(**kw)
args[func.__annotations__['return']] = result
return result
print(execute([FinalProduct, SumFirstArguments, SinOfThird], x=1,y=2,z=3))
E.g. I've got the following python function:
def func(x):
"""Function docstring."""
result = x + 1
if result > 0:
# comment 2
return result
else:
# comment 3
return -1 * result
And I want to have some function that would print all function docstrings and comments that are met along the execution path, e.g.
> trace(func(2))
Function docstring.
Comment 2
3
In fact what I try to achieve is to provide some comments how the result has been calculated.
What could be used? AST as far as I understand does not keep comment in the tree.
I thought this was an interesting challenge, so I decided to give it a try. Here is what I came up with:
import ast
import inspect
import re
import sys
import __future__
if sys.version_info >= (3,5):
ast_Call = ast.Call
else:
def ast_Call(func, args, keywords):
"""Compatibility wrapper for ast.Call on Python 3.4 and below.
Used to have two additional fields (starargs, kwargs)."""
return ast.Call(func, args, keywords, None, None)
COMMENT_RE = re.compile(r'^(\s*)#\s?(.*)$')
def convert_comment_to_print(line):
"""If `line` contains a comment, it is changed into a print
statement, otherwise nothing happens. Only acts on full-line comments,
not on trailing comments. Returns the (possibly modified) line."""
match = COMMENT_RE.match(line)
if match:
return '{}print({!r})\n'.format(*match.groups())
else:
return line
def convert_docstrings_to_prints(syntax_tree):
"""Walks an AST and changes every docstring (i.e. every expression
statement consisting only of a string) to a print statement.
The AST is modified in-place."""
ast_print = ast.Name('print', ast.Load())
nodes = list(ast.walk(syntax_tree))
for node in nodes:
for bodylike_field in ('body', 'orelse', 'finalbody'):
if hasattr(node, bodylike_field):
for statement in getattr(node, bodylike_field):
if (isinstance(statement, ast.Expr) and
isinstance(statement.value, ast.Str)):
arg = statement.value
statement.value = ast_Call(ast_print, [arg], [])
def get_future_flags(module_or_func):
"""Get the compile flags corresponding to the features imported from
__future__ by the specified module, or by the module containing the
specific function. Returns a single integer containing the bitwise OR
of all the flags that were found."""
result = 0
for feature_name in __future__.all_feature_names:
feature = getattr(__future__, feature_name)
if (hasattr(module_or_func, feature_name) and
getattr(module_or_func, feature_name) is feature and
hasattr(feature, 'compiler_flag')):
result |= feature.compiler_flag
return result
def eval_function(syntax_tree, func_globals, filename, lineno, compile_flags,
*args, **kwargs):
"""Helper function for `trace`. Execute the function defined by
the given syntax tree, and return its return value."""
func = syntax_tree.body[0]
func.decorator_list.insert(0, ast.Name('_trace_exec_decorator', ast.Load()))
ast.increment_lineno(syntax_tree, lineno-1)
ast.fix_missing_locations(syntax_tree)
code = compile(syntax_tree, filename, 'exec', compile_flags, True)
result = [None]
def _trace_exec_decorator(compiled_func):
result[0] = compiled_func(*args, **kwargs)
func_locals = {'_trace_exec_decorator': _trace_exec_decorator}
exec(code, func_globals, func_locals)
return result[0]
def trace(func, *args, **kwargs):
"""Run the given function with the given arguments and keyword arguments,
and whenever a docstring or (whole-line) comment is encountered,
print it to stdout."""
filename = inspect.getsourcefile(func)
lines, lineno = inspect.getsourcelines(func)
lines = map(convert_comment_to_print, lines)
modified_source = ''.join(lines)
compile_flags = get_future_flags(func)
syntax_tree = compile(modified_source, filename, 'exec',
ast.PyCF_ONLY_AST | compile_flags, True)
convert_docstrings_to_prints(syntax_tree)
return eval_function(syntax_tree, func.__globals__,
filename, lineno, compile_flags, *args, **kwargs)
It is a bit long because I tried to cover most important cases, and the code might not be the most readable, but I hope it is nice enough to follow.
How it works:
First, read the function's source code using inspect.getsourcelines. (Warning: inspect does not work for functions that were defined interactively. If you need that, maybe you can use dill instead, see this answer.)
Search for lines that look like comments, and replace them with print statements. (Right now only whole-line comments are replaced, but it shouldn't be difficult to extend that to trailing comments if desired.)
Parse the source code into an AST.
Walk the AST and replace all docstrings with print statements.
Compile the AST.
Execute the AST. This and the previous step contain some trickery to try to reconstruct the context that the function was originally defined in (e.g. globals, __future__ imports, line numbers for exception tracebacks). Also, since just executing the source would only re-define the function and not call it, we fix that with a simple decorator.
It works in Python 2 and 3 (at least with the tests below, which I ran in 2.7 and 3.6).
To use it, simply do:
result = trace(func, 2) # result = func(2)
Here is a slightly more elaborate test that I used while writing the code:
#!/usr/bin/env python
from trace_comments import trace
from dateutil.easter import easter, EASTER_ORTHODOX
def func(x):
"""Function docstring."""
result = x + 1
if result > 0:
# comment 2
return result
else:
# comment 3
return -1 * result
if __name__ == '__main__':
result1 = trace(func, 2)
print("result1 = {}".format(result1))
result2 = trace(func, -10)
print("result2 = {}".format(result2))
# Test that trace() does not permanently replace the function
result3 = func(42)
print("result3 = {}".format(result3))
print("-----")
print(trace(easter, 2018))
print("-----")
print(trace(easter, 2018, EASTER_ORTHODOX))
I am trying to build a program which allows the user to browse to a folder which contains python modules. Once the folder has been selected it will list all python files within that folder as well as all the classes and methods for each module. My question is, are there any way I can do this without opening each file and parsing for "def" or "class"? I noticed that there's a function called mro which returns the attribute of a class but that requires me to have access to that class through an import. So is there any way I can get the same result? Thank you in advance!
This is what I came up with using the AST module, it has exactly what I was looking for.
def fillClassList(file):
classList = []
className = None
mehotdName = None
fileName = "C:\Transcriber\Framework\ctetest\RegressionTest\GeneralTest\\" + file
fileObject = open(fileName,"r")
text = fileObject.read()
p = ast.parse(text)
node = ast.NodeVisitor()
for node in ast.walk(p):
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
if isinstance(node, ast.ClassDef):
className = node.name
else:
methodName = node.name
if className != None and methodName != None:
subList = (methodName , className)
classList.append(subList)
return classList
If you want to know the contents of the file, there's no way around looking into the file :)
Your choice comes down to whether you want to parse out the content-of-interest yourself, or if you want to let Python load the file and then ask it about what it found.
For a very simple Python file like testme.py below you can do something like this (warning: not for those with weak stomachs):
testme.py:
class Foo (object):
pass
def bar():
pass
analyze.py:
import os.path
files = ['testme.py']
for f in files:
print f
modname = os.path.splitext(f)[0]
exec('import ' + modname)
mod = eval(modname)
for symbol in dir(mod):
if symbol.startswith('__'):
continue
print ' ', symbol, type(eval(modname + '.' + symbol))
Output:
testme.py
Foo <type 'type'>
bar <type 'function'>
However, that's going to start to get pretty grotty when you expand it to deal with nested packages and modules and broken code and blah blah blah. Might be easier just to grep for class and/or def and go from there.
Have fun with it! I :heart: metaprogramming
Most of Python's implementation (parser included) is available in the stdlib, so by carefully reading the modules index you should find what you need. The first modules / packages that come to mind are importlib, inspect and ast but there surely other modules of interest.
I had to replace a lot of code in one of my modules, here is my way of getting classes and methods:
def listClass(file):
with open(file,"r") as f:
p = ast.parse(f.read())
# get all classes from the given python file.
classes = [c for c in ast.walk(p) if isinstance(c,ast.ClassDef)]
out = dict()
for x in classes:
out[x.name] = [fun.name for fun in ast.walk(x) if isinstance(fun,ast.FunctionDef)]
return out
Sample pprint output:
{'Alert': ['__init__',
'fg',
'fg',
'bg',
'bg',
'paintEvent',
'drawBG',
'drawAlert'],
'AlertMouse': ['__init__', 'paintEvent', 'mouseMoveEvent'],
'AlertPopup': ['__init__', 'mousePressEvent', 'keyPressEvent', 'systemInfo']}
Thanks, useful example for this first time ast user. Code above with the import, printed output, and without the 1 spelling error ;-)
import ast
classList = []
className = None
methodName = None
fileName = "C:\\fullPathToAPythonFile.py"
fileObject = open(fileName ,"r")
text = fileObject.read()
p = ast.parse(text)
node = ast.NodeVisitor()
for node in ast.walk(p):
if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
if isinstance(node, ast.ClassDef):
className = node.name
else:
methodName = node.name
if className != None and methodName != None:
subList = (methodName , className)
classList.append(subList)
print("class: " + className + ", method: " + methodName)
I have a yaml script that we use to specify functions. The yaml file parses into a dictionary (actually, nested dictionaries) that I want to use to construct the functions described in this yaml file. Here's an example yaml entry:
Resistance:
arguments:
voltage: "V"
current: "A"
parameters:
a: -1.23
b: 0.772
format: "{a}*voltage+{b}*current+f(voltage)"
subfunctions:
f:
arguments:
voltage: "V"
parameters:
a: -6.32
format: "exp({a}*voltage)"
Now, what need to do is parse this file and then build up the namespaces so that at the end, I can bind a variable called "Resistance" to a closure or lambda that reflects the above function (with nested "f" subfunction).
My strategy was to go "bottom up" using a recursive algorithm. Here is my code:
def evaluateSimpleFunction(entry):
functionString = entry['format']
functionArgs = []
Params = []
if "arguments" in entry and entry["arguments"] != None:
functionArgs = entry['arguments'].keys()
if "parameters" in entry and entry["parameters"] != None:
Params = entry['parameters']
formatString = ""
for param in Params:
formatString += str(param)+"="+str(Params[param])+","
functionString = eval("functionString.format("+formatString+")")
lambdaString = ""
for arg in functionArgs:
lambdaString += str(arg)+","
return eval("lambda " + lambdaString + ":" + functionString)
def recursiveLoader(entry):
if "subfunctions" in entry:
subfunctions = entry['subfunctions']
bindingString = ""
for subFunc in subfunctions:
bindingString +=str(subFunc)+"=[];"
exec(bindingString)
for subFunc in subfunctions:
exec(str(subFunc)+"= recursiveLoader(subfunctions[subFunc])")
return lambda : evaluateSimpleFunction(entry)
else:
return lambda : evaluateSimpleFunction(entry)
import yaml,os, math
os.chdir(r"C:\Users\212544808\Desktop\PySim\xferdb")
keyFields = ["Resistance","OCV"]
containerKeys = ["_internalResistance","_OCV"]
functionContainer = {}
with open("LGJP1.yml",'r') as modelFile:
parsedModelFile = yaml.load(modelFile)
#for funcKey,containerKey in zip(keyFields,containerKeys):
entry = parsedModelFile["capacityDegrade"]
g = recursiveLoader(entry)
Now, as it stands, I get an error because I am using unqualified exec with a nested function.
However, I don't want to resort to globals, because I will use this process for multiple functions and will therefore overwrite any globals I use.
I'm hoping for suggestions on how to construct nested functions algorithmically from an external config file like the yaml file - exec doesn't seem to be the way to go.
BTW: I'm using Python 2.7
UPPDATE
Another, more robust option may be to use a global class instance to create a namespace for each function. For example:
class Namespace(): pass
namespace_1 = Namespace()
#assume that the function "exponent" has arguments X, Y and body "Q(X*Y)",
#where "Q" has body "x**2+3*y"
exec("namespace_1.exponent = lambda X,Y: Q(X*Y)")
exec("namespace_1.Q = lambda x,y: x**2+3*y")
The benefit of this approach is that I can then loop through the members of the class for a particular function to create a single source code string that I can pass to "eval" to get the final function.
I'm doing all of this because I have not found a reliable way to create nested closures using eval and exec.
Here's a simplified example of what I mean using your input. I have hardcoded it, but you could easily build up a similar module file using your parser:
def makeModule(**kwargs):
print repr(kwargs)
module_filename = 'generated_module.py'
with open(module_filename, 'w') as module_file:
module_file.write('''\
from math import *
def func(voltage, current):
def f(voltage):
return exp({a1} * voltage)
return {a0}*voltage+{b}*current+f(voltage)
'''.format(**kwargs))
module_name = module_filename.replace('.py', '')
module = __import__(module_name)
return module.func
def main():
func = makeModule(a0=-1.23, b=0.772, a1=-6.32)
print 'Result:', func(2, 3)
if __name__ == '__main__':
main()
It works by generating a file called generated_module.py and then using the builtin function __import__ to import it as a module that is stored into the variable module. Like any other module, then you can access the names defined in it, namely func.
I wrote a method called buildRegex that, given a name (of type str), returns a regex object that finds a from ... import ... name statement in a Python module.
For example, here is the expected behaviour of buildRegex:
>>> regObj = buildRegex('foo')
>>> regObj.search('from a import fool') is None
True
>>> regObj.search('from a import foo') is not None
True
>>> regObj.search('from a.b.c import foo as food') is None
True
>>> regObj.search('from a.b.c import fool, bar as foo') is not None
True
What I have so far works for all the examples above (and more):
def buildRegex(name):
singleImportedName = r'(\b{0}\b(?!\s+as\s+))'.format(name)
importStatement = r'from\s+(\w+(\.\w+)*)\s+import\s+([^#\n]*)(?={0})'.format(singleImportedName )
return re.compile(importStatement)
buildRegex assumes that the searched module has no SyntaxErrors which is OK.
My problem is, when looking for the imported name foo, I also need to know if it is an alias to a different name. I.e. if a module has the statement:
from a.b.c import bar as foo
I want to know what foo is aliasing, in this case, that would be bar. Currently, due to asserted lookaheads in the regex, that is not possible. So, finally my question:
How can I refactor the regex so that this information is not lost, i.e., if the given name is an alias, then the the name its aliasing is in one of the regex's groups?
I'd recommend that instead of writing complicated regular expressions to parse imports, one would actually use the ast.parse to parse the source code into abstract syntax tree and find the names from there, as ast.parse is guaranteed to parse Python correctly. Something like:
import ast
class ImportFinder(ast.NodeVisitor):
def __init__(self):
self.imports = []
def visit_Import(self, node):
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(['import', names])
def visit_ImportFrom(self, node):
module = node.module
level = node.level # how many dots
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(('from', level, module, names))
def parse_imports(source):
tree = ast.parse(source)
finder = ImportFinder()
finder.visit(tree)
return finder.imports
Example usage:
import pprint
pprint.pprint(parse_imports('''
from foo import bar, baz, frob
from .. import bar as spam, baz as ham, frob
import bar.baz
import bar.foo as baf
'''))
Prints out:
[('from', 0, 'foo', [('bar', None), ('baz', None), ('frob', None)]),
('from', 2, None, [('bar', 'spam'), ('baz', 'ham'), ('frob', None)]),
['import', [('bar.baz', None)]],
['import', [('bar.foo', 'baf')]]]
The integer on the from lines gives the number of . before the module name.
import inspect
import importlib
import ast
class Imports(ast.NodeVisitor):
def visit_Import(self, node):
print("In Import")
for imp in node.names:
if imp.asname is not None:
print("module name = {}, alias = {}".format(imp.name, imp.asname))
else:
print("module name = {}".format(imp.name))
print()
def visit_ImportFrom(self, node):
print("In ImportFrom")
for imp in node.names:
if imp.asname is not None:
print("module = {}\nname = {}\nalias = {}\nlevel = {}\n".
format(node.module, imp.name, imp.asname, node.level))
else:
print("module = {}\nname = {}\nlevel = {}\n".
format(node.module, imp.name, node.level))
print()
mod = "temp_test"
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
Imports().visit(p)
Input:
from bisect import bisect_left as bs
import datetime
import time
import numpy as np
def foo():
from re import findall
class Foo():
def test(self):
from re import compile as cp, finditer as ft
Output:
In ImportFrom
module = bisect
name = bisect_left
alias = bs
level = 0
In Import
module name = datetime
In Import
module name = time
In Import
module name = numpy, alias = np
In ImportFrom
module = re
name = findall
level = 0
In ImportFrom
module = re
name = compile
alias = cp
level = 0
module = re
name = finditer
alias = ft
level = 0
class Import(names)
An import statement. names is a list of alias nodes.
class ImportFrom(module, names, level)
Represents from x import y. module is a raw string of the ‘from’ name, without any leading dots, or None for statements such as from . import foo. level is an integer holding the level of the relative import (0 means absolute import).
The greentreesnakes documentation for me at least has a much better explanation of what all the nodes do and how to use the ast module than the actual ast documentation itself.
You can use also pass the module directly or open the py file and pass the content to ast.parse:
with open("temp_test.py") as f:
p = ast.parse(f.read(), filename="<ast>", mode="exec")
Imports().visit(p)
And passing the module:
import temp_test
p = ast.parse(inspect.getsource(temp_test))
Imports().visit(p)