Getting list of undefined functions in Python code - python

Given Python code,
def foo():
def bar():
pass
bar()
foo()
bar()
I'd like to get a list of functions which, if I execute the Python code, will result in a NameError.
In this example, the list should be ['bar'], because it is not defined in the global scope and will cause an error when executed.
Executing the code in a loop, each time defining new functions, is not performant enough.
Currently I walk the AST tree, record all function definitions and all function calls, and subtract one from the other. This gives the wrong result in this case.

it seems you are trying to write some static analyzer for python. maybe you are working on C, but i think it would be faster for me to show the idea only in python:
list_token = # you have tokenized the program now.
class Env:
def __init__(self):
self.env = set()
self.super_env = None # this will point to Env instance
def __contains__(self, key):
if key in self.env:
return True
if self.sub_env is not None:
return key in self.super_env
def add(self, key):
self.env.add(key)
topenv = Env()
currentenv = topenv
ret = [] # return list
for tok in list_token:
if is_colon(tok): # is ':', ie. enter a new scope
newenv = Env()
currentenv.super_env = newenv
currentenv = newenv
else if is_exiting(tok): # exit a scope
currentenv = currentenv.super_env
else if refing_symbol(tok):
if tok not in currentenv: ret.add(tok)
else if new_symbol(tok):
currentenv.add(tok)
else: pass
if you think this code is not enough, please point out the reason. and if you want to capture all by static analysis, i think it's not quite possible.

Related

Store and restore exec's "execution state" / global namespace

I'm trying to run code snippets with pythons exec function and I want to be able to rerun (single) snippets so that they start their execution from the "execution state" the previous snippet (it's "parent") stored.
Probably easier to explain as code:
class Snippet:
def __init__(self, source, parent):
self.source = source
self.parent = parent
self.namespace = None
def execute(self):
self.namespace = cloneNamespace(self.parent)
executeInNamespace(self.source, self.namespace)
snip1 = Snippet('a = 1', None)
snip2 = Snippet('print(a)', snip1)
snip3 = Snippet('a = 2', snip2)
snip1.execute() # a = 1
snip2.execute() # print(a) --> 1
snip3.execute() # a = 2
snip2.execute() # print(a) --> 1 (!)
The second call to snip2 should run again from the namespace / execution state after/of snip1. parent.namespace is still snip1.namespace so a is 1 and snip2.execute should print out 1.
The question is how should cloneNamespace and executeInNamespace look like.
The best solution I could come up with so far is:
import dill
import types
def cloneNamespace(parent):
if parent == None:
return {}
newNamespace = {}
for k, v in parent.namespace.items():
# function and classes as refs, because they store a __globals__ ref
if isinstance(v, types.FunctionType) or isinstance(v, type):
newNamespace[k] = v
else:
newNamespace[k] = dill.copy(v)
return newNamespace
# global "wrapper namespace" so that __globals__ of funcs and classes don't get invalid
globalNamespace = {}
def executeInNamespace(source, namespace):
globalNamespace.clear()
globalNamespace.update(namespace)
exec(source, globalNamespace)
namespace.update(globalNamespace)
That work's for basic stuff but fails for example with these snippets:
snip1 = Snippet('''class A:
def bla(self):
print(x)''', None)
snip2 = Snippet('aInst = A()', snip1)
snip3 = Snippet('x = 1', snip2)
snip4 = Snippet('aInst.bla()', snip3)
I also tried dill.[dump,load]_module but couldn't get it working in the exec environment.
I know there are gonna be issues with file descriptors and similar, but I'd like to handle them the straight forward way everything else is also handled (rerun from the previous state).
Any ideas?
Are there other options -- wihtout exec -- to achieve this?
Thx!

Python: Decorator Dispatcher: Catch-22

I'm trying to build a decorator-based dispatcher, such as you find used by Flask or Pyramid. I got something that works, but ran into a bit of a catch-22. The following code works, but only because foo() gets executed and sets the .mq_path-attribute. When starting the application and building a list of the dispatchable functions no attributes are thus set yet. I want to execute foo() driven by events.
I could "manually" prepare a list of functions ahead and updated as I add functions, but I enjoy the way Flask works, by just adding a decorator to a function that handles an URL (or in this case a MQ path).
list_of_paths = []
path_dispatcher = {}
def handle_mq(path):
def decorator(fn):
def decorated(*args,**kwargs):
decorated.mq_path = path
print "Hello from the handle_mq() decorator, your path is: {0}".format(path)
return fn(*args,**kwargs)
return decorated
return decorator
#handle_mq('/some/path')
def foo():
print "foo!"
foo() # <- this code only works if I first call the decorated function
for k, v in globals().items():
if hasattr(v, 'mq_path'):
list_of_paths.append(v.mq_path)
path_dispatcher[v.mq_path] = v
print list_of_paths
print path_dispatcher
path_dispatcher['/some/path']()
So basically the question is, how to gather a list of the decorated functions before they are first executed?
I'm on Python 2.7.
I found the answer!
list_of_paths = []
path_dispatcher = {}
def handle_mq(path):
def decorator(fn):
list_of_paths.append(path)
path_dispatcher[path] = fn
def decorated(*args,**kwargs):
print "Hello from handl_mq decorator, your path is: {0}".format(path)
return fn(*args,**kwargs)
return decorated
return decorator
#handle_mq('/some/path')
def foo():
print "foo!"
print list_of_paths
print path_dispatcher
path_dispatcher['/some/path']()

How can I tell what function called my function? [duplicate]

Python: How to get the caller's method name in the called method?
Assume I have 2 methods:
def method1(self):
...
a = A.method2()
def method2(self):
...
If I don't want to do any change for method1, how to get the name of the caller (in this example, the name is method1) in method2?
inspect.getframeinfo and other related functions in inspect can help:
>>> import inspect
>>> def f1(): f2()
...
>>> def f2():
... curframe = inspect.currentframe()
... calframe = inspect.getouterframes(curframe, 2)
... print('caller name:', calframe[1][3])
...
>>> f1()
caller name: f1
this introspection is intended to help debugging and development; it's not advisable to rely on it for production-functionality purposes.
Shorter version:
import inspect
def f1(): f2()
def f2():
print 'caller name:', inspect.stack()[1][3]
f1()
(with thanks to #Alex, and Stefaan Lippen)
This seems to work just fine:
import sys
print sys._getframe().f_back.f_code.co_name
I would use inspect.currentframe().f_back.f_code.co_name. Its use hasn't been covered in any of the prior answers which are mainly of one of three types:
Some prior answers use inspect.stack but it's known to be too slow.
Some prior answers use sys._getframe which is an internal private function given its leading underscore, and so its use is implicitly discouraged.
One prior answer uses inspect.getouterframes(inspect.currentframe(), 2)[1][3] but it's entirely unclear what [1][3] is accessing.
import inspect
from types import FrameType
from typing import cast
def demo_the_caller_name() -> str:
"""Return the calling function's name."""
# Ref: https://stackoverflow.com/a/57712700/
return cast(FrameType, cast(FrameType, inspect.currentframe()).f_back).f_code.co_name
if __name__ == '__main__':
def _test_caller_name() -> None:
assert demo_the_caller_name() == '_test_caller_name'
_test_caller_name()
Note that cast(FrameType, frame) is used to satisfy mypy.
Acknowlegement: comment by 1313e for an answer.
I've come up with a slightly longer version that tries to build a full method name including module and class.
https://gist.github.com/2151727 (rev 9cccbf)
# Public Domain, i.e. feel free to copy/paste
# Considered a hack in Python 2
import inspect
def caller_name(skip=2):
"""Get a name of a caller in the format module.class.method
`skip` specifies how many levels of stack to skip while getting caller
name. skip=1 means "who calls me", skip=2 "who calls my caller" etc.
An empty string is returned if skipped levels exceed stack height
"""
stack = inspect.stack()
start = 0 + skip
if len(stack) < start + 1:
return ''
parentframe = stack[start][0]
name = []
module = inspect.getmodule(parentframe)
# `modname` can be None when frame is executed directly in console
# TODO(techtonik): consider using __main__
if module:
name.append(module.__name__)
# detect classname
if 'self' in parentframe.f_locals:
# I don't know any way to detect call from the object method
# XXX: there seems to be no way to detect static method call - it will
# be just a function call
name.append(parentframe.f_locals['self'].__class__.__name__)
codename = parentframe.f_code.co_name
if codename != '<module>': # top level usually
name.append( codename ) # function or a method
## Avoid circular refs and frame leaks
# https://docs.python.org/2.7/library/inspect.html#the-interpreter-stack
del parentframe, stack
return ".".join(name)
Bit of an amalgamation of the stuff above. But here's my crack at it.
def print_caller_name(stack_size=3):
def wrapper(fn):
def inner(*args, **kwargs):
import inspect
stack = inspect.stack()
modules = [(index, inspect.getmodule(stack[index][0]))
for index in reversed(range(1, stack_size))]
module_name_lengths = [len(module.__name__)
for _, module in modules]
s = '{index:>5} : {module:^%i} : {name}' % (max(module_name_lengths) + 4)
callers = ['',
s.format(index='level', module='module', name='name'),
'-' * 50]
for index, module in modules:
callers.append(s.format(index=index,
module=module.__name__,
name=stack[index][3]))
callers.append(s.format(index=0,
module=fn.__module__,
name=fn.__name__))
callers.append('')
print('\n'.join(callers))
fn(*args, **kwargs)
return inner
return wrapper
Use:
#print_caller_name(4)
def foo():
return 'foobar'
def bar():
return foo()
def baz():
return bar()
def fizz():
return baz()
fizz()
output is
level : module : name
--------------------------------------------------
3 : None : fizz
2 : None : baz
1 : None : bar
0 : __main__ : foo
You can use decorators, and do not have to use stacktrace
If you want to decorate a method inside a class
import functools
# outside ur class
def printOuterFunctionName(func):
#functools.wraps(func)
def wrapper(self):
print(f'Function Name is: {func.__name__}')
func(self)
return wrapper
class A:
#printOuterFunctionName
def foo():
pass
you may remove functools, self if it is procedural
An alternative to sys._getframe() is used by Python's Logging library to find caller information. Here's the idea:
raise an Exception
immediately catch it in an Except clause
use sys.exc_info to get Traceback frame (tb_frame).
from tb_frame get last caller's frame using f_back.
from last caller's frame get the code object that was being executed in that frame.
In our sample code it would be method1 (not method2) being executed.
From code object obtained, get the object's name -- this is caller method's name in our sample.
Here's the sample code to solve example in the question:
def method1():
method2()
def method2():
try:
raise Exception
except Exception:
frame = sys.exc_info()[2].tb_frame.f_back
print("method2 invoked by: ", frame.f_code.co_name)
# Invoking method1
method1()
Output:
method2 invoked by: method1
Frame has all sorts of details, including line number, file name, argument counts, argument type and so on. The solution works across classes and modules too.
Code:
#!/usr/bin/env python
import inspect
called=lambda: inspect.stack()[1][3]
def caller1():
print "inside: ",called()
def caller2():
print "inside: ",called()
if __name__=='__main__':
caller1()
caller2()
Output:
shahid#shahid-VirtualBox:~/Documents$ python test_func.py
inside: caller1
inside: caller2
shahid#shahid-VirtualBox:~/Documents$
I found a way if you're going across classes and want the class the method belongs to AND the method. It takes a bit of extraction work but it makes its point. This works in Python 2.7.13.
import inspect, os
class ClassOne:
def method1(self):
classtwoObj.method2()
class ClassTwo:
def method2(self):
curframe = inspect.currentframe()
calframe = inspect.getouterframes(curframe, 4)
print '\nI was called from', calframe[1][3], \
'in', calframe[1][4][0][6: -2]
# create objects to access class methods
classoneObj = ClassOne()
classtwoObj = ClassTwo()
# start the program
os.system('cls')
classoneObj.method1()
Hey mate I once made 3 methods without plugins for my app and maybe that can help you, It worked for me so maybe gonna work for you too.
def method_1(a=""):
if a == "method_2":
print("method_2")
if a == "method_3":
print("method_3")
def method_2():
method_1("method_2")
def method_3():
method_1("method_3")
method_2()

How to watch for a variable change in python without dunder setattr or pdb

There is large python project where one attribute of one class just have wrong value in some place.
It should be sqlalchemy.orm.attributes.InstrumentedAttribute, but when I run tests it is constant value, let's say string.
There is some way to run python program in debug mode, and run some check (if variable changed type) after each step throught line of code automatically?
P.S. I know how to log changes of attribute of class instance with help of inspect and property decorator. Possibly here I can use this method with metaclasses...
But sometimes I need more general and powerfull solution...
Thank you.
P.P.S. I need something like there: https://stackoverflow.com/a/7669165/816449, but may be with more explanation of what is going on in that code.
Well, here is a sort of slow approach. It can be modified for watching for local variable change (just by name). Here is how it works: we do sys.settrace and analyse the value of obj.attr each step. The tricky part is that we receive 'line' events (that some line was executed) before line is executed. So, when we notice that obj.attr has changed, we are already on the next line and we can't get the previous line frame (because frames aren't copied for each line, they are modified ). So on each line event I save traceback.format_stack to watcher.prev_st and if on the next call of trace_command value has changed, we print the saved stack trace to file. Saving traceback on each line is quite an expensive operation, so you'd have to set include keyword to a list of your projects directories (or just the root of your project) in order not to watch how other libraries are doing their stuff and waste cpu.
watcher.py
import traceback
class Watcher(object):
def __init__(self, obj=None, attr=None, log_file='log.txt', include=[], enabled=False):
"""
Debugger that watches for changes in object attributes
obj - object to be watched
attr - string, name of attribute
log_file - string, where to write output
include - list of strings, debug files only in these directories.
Set it to path of your project otherwise it will take long time
to run on big libraries import and usage.
"""
self.log_file=log_file
with open(self.log_file, 'wb'): pass
self.prev_st = None
self.include = [incl.replace('\\','/') for incl in include]
if obj:
self.value = getattr(obj, attr)
self.obj = obj
self.attr = attr
self.enabled = enabled # Important, must be last line on __init__.
def __call__(self, *args, **kwargs):
kwargs['enabled'] = True
self.__init__(*args, **kwargs)
def check_condition(self):
tmp = getattr(self.obj, self.attr)
result = tmp != self.value
self.value = tmp
return result
def trace_command(self, frame, event, arg):
if event!='line' or not self.enabled:
return self.trace_command
if self.check_condition():
if self.prev_st:
with open(self.log_file, 'ab') as f:
print >>f, "Value of",self.obj,".",self.attr,"changed!"
print >>f,"###### Line:"
print >>f,''.join(self.prev_st)
if self.include:
fname = frame.f_code.co_filename.replace('\\','/')
to_include = False
for incl in self.include:
if fname.startswith(incl):
to_include = True
break
if not to_include:
return self.trace_command
self.prev_st = traceback.format_stack(frame)
return self.trace_command
import sys
watcher = Watcher()
sys.settrace(watcher.trace_command)
testwatcher.py
from watcher import watcher
import numpy as np
import urllib2
class X(object):
def __init__(self, foo):
self.foo = foo
class Y(object):
def __init__(self, x):
self.xoo = x
def boom(self):
self.xoo.foo = "xoo foo!"
def main():
x = X(50)
watcher(x, 'foo', log_file='log.txt', include =['C:/Users/j/PycharmProjects/hello'])
x.foo = 500
x.goo = 300
y = Y(x)
y.boom()
arr = np.arange(0,100,0.1)
arr = arr**2
for i in xrange(3):
print 'a'
x.foo = i
for i in xrange(1):
i = i+1
main()
There's a very simple way to do this: use watchpoints.
Basically you only need to do
from watchpoints import watch
watch(your_object.attr)
That's it. Whenever the attribute is changed, it will print out the line that changed it and how it's changed. Super easy to use.
It also has more advanced features, for example, you can call pdb when the variable is changed, or use your own callback functions instead of print it to stdout.
A simpler way to watch for an object's attribute change (which can also be a module-level variable or anything accessible with getattr) would be to leverage hunter library, a flexible code tracing toolkit. To detect state changes we need a predicate which can look like the following:
import traceback
class MutationWatcher:
def __init__(self, target, attrs):
self.target = target
self.state = {k: getattr(target, k) for k in attrs}
def __call__(self, event):
result = False
for k, v in self.state.items():
current_value = getattr(self.target, k)
if v != current_value:
result = True
self.state[k] = current_value
print('Value of attribute {} has chaned from {!r} to {!r}'.format(
k, v, current_value))
if result:
traceback.print_stack(event.frame)
return result
Then given a sample code:
class TargetThatChangesWeirdly:
attr_name = 1
def some_nested_function_that_does_the_nasty_mutation(obj):
obj.attr_name = 2
def some_public_api(obj):
some_nested_function_that_does_the_nasty_mutation(obj)
We can instrument it with hunter like:
# or any other entry point that calls the public API of interest
if __name__ == '__main__':
obj = TargetThatChangesWeirdly()
import hunter
watcher = MutationWatcher(obj, ['attr_name'])
hunter.trace(watcher, stdlib=False, action=hunter.CodePrinter)
some_public_api(obj)
Running the module produces:
Value of attribute attr_name has chaned from 1 to 2
File "test.py", line 44, in <module>
some_public_api(obj)
File "test.py", line 10, in some_public_api
some_nested_function_that_does_the_nasty_mutation(obj)
File "test.py", line 6, in some_nested_function_that_does_the_nasty_mutation
obj.attr_name = 2
test.py:6 return obj.attr_name = 2
... return value: None
You can also use other actions that hunter supports. For instance, Debugger which breaks into pdb (debugger on an attribute change).
Try using __setattr__ to override the function that is called when an attribute assignment is attempted. Documentation for __setattr__
You can use the python debugger module (part of the standard library)
To use, just import pdb at the top of your source file:
import pdb
and then set a trace wherever you want to start inspecting the code:
pdb.set_trace()
You can then step through the code with n, and investigate the current state by running python commands.
def __setattr__(self, name, value):
if name=="xxx":
util.output_stack('xxxxx')
super(XXX, self).__setattr__(name, value)
This sample code helped me.

How can I refer to a function not by name in its definition in python?

I am maintaining a little library of useful functions for interacting with my company's APIs and I have come across (what I think is) a neat question that I can't find the answer to.
I frequently have to request large amounts of data from an API, so I do something like:
class Client(object):
def __init__(self):
self.data = []
def get_data(self, offset = 0):
done = False
while not done:
data = get_more_starting_at(offset)
self.data.extend(data)
offset += 1
if not data:
done = True
This works fine and allows me to restart the retrieval where I left off if something goes horribly wrong. However, since python functions are just regular objects, we can do stuff like:
def yo():
yo.hi = "yo!"
return None
and then we can interrogate yo about its properties later, like:
yo.hi => "yo!"
my question is: Can I rewrite my class-based example to pin the data to the function itself, without referring to the function by name. I know I can do this by:
def get_data(offset=0):
done = False
get_data.data = []
while not done:
data = get_more_starting_from(offset)
get_data.data.extend(data)
offset += 1
if not data:
done = True
return get_data.data
but I would like to do something like:
def get_data(offset=0):
done = False
self.data = [] # <===== this is the bit I can't figure out
while not done:
data = get_more_starting_from(offset)
self.data.extend(data) # <====== also this!
offset += 1
if not data:
done = True
return self.data # <======== want to refer to the "current" object
Is it possible to refer to the "current" object by anything other than its name?
Something like "this", "self", or "memememe!" is what I'm looking for.
I don't understand why you want to do this, but it's what a fixed point combinator allows you to do:
import functools
def Y(f):
#functools.wraps(f)
def Yf(*args):
return inner(*args)
inner = f(Yf)
return Yf
#Y
def get_data(f):
def inner_get_data(*args):
# This is your real get data function
# define it as normal
# but just refer to it as 'f' inside itself
print 'setting get_data.foo to', args
f.foo = args
return inner_get_data
get_data(1, 2, 3)
print get_data.foo
So you call get_data as normal, and it "magically" knows that f means itself.
You could do this, but (a) the data is not per-function-invocation, but per function (b) it's much easier to achieve this sort of thing with a class.
If you had to do it, you might do something like this:
def ybother(a,b,c,yrselflambda = lambda: ybother):
yrself = yrselflambda()
#other stuff
The lambda is necessary, because you need to delay evaluation of the term ybother until something has been bound to it.
Alternatively, and increasingly pointlessly:
from functools import partial
def ybother(a,b,c,yrself=None):
#whatever
yrself.data = [] # this will blow up if the default argument is used
#more stuff
bothered = partial(ybother, yrself=ybother)
Or:
def unbothered(a,b,c):
def inbothered(yrself):
#whatever
yrself.data = []
return inbothered, inbothered(inbothered)
This last version gives you a different function object each time, which you might like.
There are almost certainly introspective tricks to do this, but they are even less worthwhile.
Not sure what doing it like this gains you, but what about using a decorator.
import functools
def add_self(f):
#functools.wraps(f)
def wrapper(*args,**kwargs):
if not getattr(f, 'content', None):
f.content = []
return f(f, *args, **kwargs)
return wrapper
#add_self
def example(self, arg1):
self.content.append(arg1)
print self.content
example(1)
example(2)
example(3)
OUTPUT
[1]
[1, 2]
[1, 2, 3]

Categories

Resources