How to reference function defined in __init__ super class - python

I have a helper function (def convert(dictionary) inside my __init__ class to assist with configuration setup. The definition is as follows:
class Configuration:
def __init__(self, config_file=None, config=None):
if config_file is not None:
with open(config_file) as in_file:
self._config = yaml.load(in_file, Loader=yaml.FullLoader)
elif config is not None:
self._config = config
else:
raise ValueError("Could not create configuration. Must pass either location of config file or valid "
"config.")
def convert(dictionary):
return namedtuple('Config', dictionary.keys())(**dictionary)
This allows me to make a call within __init__ as follows:
self.input = convert(self._config["input"])
self.output = convert(self._config["output"])
self.build = convert(self._config["build_catalog"])
Since I have more than one configs to set up I want to inherit from his class as follows:
class BuildConfiguration(Configuration):
def __init__(self, config_file=None, config=None):
super().__init__(config_file, config)
self.input = convert(self._config["input"])
self.output = convert(self._config["output"])
self.build = convert(self._config["build_catalog"])
I however do not gain access to convert from the parent class. I have also tried this:
self.input = super().__init__.convert(self._config["input"])
This also seems to not work.
So the question is how do I get access to a function defined in super().__init__ from child classes?

You can't. A new function is created on each invocation of __init__ and it is discarded, it doesn't exist outside the function. Note, this also applies to the class being created by namedtuple('Config', dictionary.keys())(**dictionary). It really isn't good to keep creating all these unnecessary classes, which totally defeats the purpose of namedtuple which is for creating memory-efficient record-types. Here, each instance has it's own class!
Here is how you should define this:
Config = namedtuple('Config', "foo bar baz")
def convert(dictionary): # is this really necessary?
return Config(**dictionary)
class Configuration:
def __init__(self, config_file=None, config=None):
if config_file is not None:
with open(config_file) as in_file:
self._config = yaml.load(in_file, Loader=yaml.FullLoader)
elif config is not None:
self._config = config
else:
raise ValueError("Could not create configuration. Must pass either location of config file or valid "
"config.")
self.input = convert(self._config["input"])
self.output = convert(self._config["output"])
self.build = convert(self._config["build_catalog"])
Although at this point, it seems cleaner to just use
Config(**self._config["input"])
etc instead and ditch the helper convert.

Related

Defining a class inside a function to interrupt decorator execution

I'm trying to configure a decorator at run time. This is somewhat related to my earlier question: How to configure a decorator in Python
The motivation for this is that I'm trying to use the Thespian troupe code "as-is".
Is it legal to have this code here, where I've defined the class (and therefore called the decorator) inside a class method? Again, the reason for this is that I could feed the max_count argument prior to the decorator being call.
The module is calculator.calculator (yes, bad choice perhaps)
class Scheduler:
def __init__(self):
self.actor_system = None
def start(self):
self.actor_system = ActorSystem('multiprocTCPBase')
def stop(self):
self.actor_system.shutdown()
def launch(self, count, func_and_data, status_cb):
class CalcPayload:
def __init__(self, func_and_data, status_cb):
self.func_and_data = func_and_data
self.status_cb = status_cb
#troupe(max_count=count)
class Execute(ActorTypeDispatcher):
def receiveMsg_CalcPayload(self, msg, sender):
func = msg.func_and_data['func']
data = msg.func_and_data['data']
status_cb = msg.status_cb
self.send(sender, func(data, status_cb))
exec_actor = self.actor_system.createActor(Execute)
for index in range(len(func_and_data)):
calc_config = CalcPayload(func_and_data[index], status_cb)
self.actor_system.tell(exec_actor, calc_config)
for index in range(len(func_and_data)):
result = self.actor_system.listen(timeout)
self.actor_system.tell(exec_actor, ActorExitRequest())
For various reasons, I can't apply the decorator to the class when I use it. There is a brief discussion on this in the question I referenced.
While not invalid, it is generally inadvisable to define a class as a local variable inside a function, as it would make access to the class difficult outside the function.
Instead, you can define the classes outside the function, and apply the decorator function to the class when it's actually needed by calling the decorator function with the class object:
class CalcPayload:
def __init__(self, func_and_data, status_cb):
self.func_and_data = func_and_data
self.status_cb = status_cb
class Execute(ActorTypeDispatcher):
def receiveMsg_CalcPayload(self, msg, sender):
func = msg.func_and_data['func']
data = msg.func_and_data['data']
status_cb = msg.status_cb
self.send(sender, func(data, status_cb))
class Scheduler:
def __init__(self):
self.actor_system = None
def start(self):
self.actor_system = ActorSystem('multiprocTCPBase')
def stop(self):
self.actor_system.shutdown()
def launch(self, count, func_and_data, status_cb):
exec_actor = self.actor_system.createActor(troupe(max_count=count)(Execute))
for index in range(len(func_and_data)):
calc_config = CalcPayload(func_and_data[index], status_cb)
self.actor_system.tell(exec_actor, calc_config)
for index in range(len(func_and_data)):
result = self.actor_system.listen(timeout)
self.actor_system.tell(exec_actor, ActorExitRequest())
The actor_system is going to want to build instances of your class. That means it needs to be able to derive the class object- you cannot define it inside of a method.
If you really need to apply the decorator separately, you maybe could do
def launch(self, count, func_and_data, status_cb):
wrapped = troupe(max_count=count)(Executor)
exec_actor = self.actor_system.createActor(wrapped)

How to update pickled objects when adding an attribute to a Python class

I defined a Python3 class and then used pickle to serialize and save an instance to file. Later I added another instance attribute to my class, but I realized that if I load my instance and try to reference that attribute I will get an "Object has no attribute" error since the instance was constructed without it. What are the best options for adding the new attribute to my pickled object(s) and configuring it?
In code, I defined a class like
# First definition
class Foo:
def __init__(self, params):
# define and initialize attributes
def print_number(self):
print(2)
I create and serialize an instance using pickle, and save it to file
import pickle
inst = Foo(params)
with open("filename", 'wb') as f:
pickle.dump(inst, f)
Then I want my class to behave a bit differently, so I update its definition:
# Updated definition
class Foo:
def __init__(self, params):
# define and initialize attributes
self.bar = "baz" # bar is a new attribute
def print_number(self):
print(3) # prints 3 instead of 2
Then I load my instance and try to call some methods
import pickle
with open("filename", 'rb') as f:
inst = pickle.load(f)
inst.print_number()
print(inst.bar)
Since pickle doesn't save method definitions, the instance method's behaviour is updated so inst.print_number() prints 3 instead of 2. However the reference inst.bar results in an "Object has no attribute" error because inst was initialized before Foo had that attribute in its definition.
Update
This was a bit of a noob question on my part, I didn't realize that Python lets you just do something like inst.bar = "baz" and set things dynamically (I'm coming from a Java background where everything has to be fixed from the start). I am still interested in hearing about ways to do this properly and/or Pythonicaly and/or pickle-specificly, especially when multiple class updates can be expected.
You could use class inheritance to add new methods/attributes to an existing class:
# First definition
class Foo:
def __init__(self, params):
self.params = params
def print_number(self):
print(2)
import pickle
inst = Foo('params')
with open("filename", 'wb') as f:
pickle.dump(inst, f)
del inst
# Updated definition
class Foo(Foo):
def __init__(self, params):
super().__init__(params)
self.bar = "baz" # bar is a new attribute
def print_number(self):
print(3)
with open("filename", 'rb') as f:
inst = Foo(pickle.load(f))
inst.print_number()
print(inst.bar)
# Outputs:
# 3
# baz
Or it probably makes more sense in practice to do something like this:
with open("filename", 'rb') as f:
inst = pickle.load(f)
# Updated definition
class Foo(inst.__class__):
def __init__(self, params):
super().__init__(params)
self.bar = "baz" # bar is a new attribute
def print_number(self):
print(3)
inst = Foo(inst)
inst.print_number()
print(inst.bar)
The general way I would go about this is by implementing __setstate__. I've pasted some code below that you can play with to get a feel for how it might work. You can also define a method that both __setstate__ and __init__ call with a dict (either the keyword arguments to __init__ or the state given to __setstate__) that will ensure the expected attributes are all set regardless of how the object is created. You might also consider implementing __new__ for your class since that gets called even for unpickling.
mod.py:
VERSION = 1
if VERSION == 1:
# Version 1
class A:
def __init__(self):
# Note: either the instance's dict has to have something set or __getstate__
# has to be overridden to return a `value` for which `bool(value) == True`
#
# See https://docs.python.org/3/library/pickle.html#object.__setstate__
self.some_attr = 2
elif VERSION == 2:
# Version 2
class A:
def __new__(cls):
inst = super().__new__(cls)
inst.other_new_attr = 6
return inst
def __init__(self):
self.some_attr = 2
self.new_attr = 5
def __setstate__(self, state):
print('setting state', state)
self.__dict__.update(state)
if not hasattr(self, 'new_attr'):
print('adding new_attr')
# you can do whatever you want to calculate new_attr here
self.new_attr = 5
run.py:
import sys
from mod import A
from pickle import dump, load
if __name__ == '__main__':
if sys.argv[1] == 'dump':
with open('a.pickle', 'wb') as f:
dump(A(), f)
elif sys.argv[1] == 'load':
# call this after adding the attribute
with open('a.pickle', 'rb') as f:
a = load(f)
print(a.new_attr)
print(a.other_new_attr)

the meaning of the instruction

I found this code in sqlmap project https://github.com/sqlmapproject/sqlmap/blob/master/lib/core/datatype.py
.
I don't understand the meaning of calling the constructor AttribDict.__init__(self)
class InjectionDict(AttribDict):
def __init__(self):
AttribDict.__init__(self)
self.place = None
self.parameter = None
self.ptype = None
self.prefix = None
self.suffix = None
self.clause = None
# data is a dict with various stype, each which is a dict with
# all the information specific for that stype
self.data = AttribDict()
# conf is a dict which stores current snapshot of important
# options used during detection
self.conf = AttribDict()
self.dbms = None
self.dbms_version = None
self.os = None
The InjectionDict class is a subclass, the base class it inherets from is AttribDict. That's what this syntax means
class InjectionDict(AttribDict):
Then in InjectDict's __init__ method, they are calling the base class's __init__ method first, before doing the rest of the subclass specific __init__ work.
AttribDict.__init__(self)
See this post for a more thorough explanation of what this behavior is used for.

python using __init__ vs just defining variables in class - any difference?

I'm new to Python - and just trying to better understand the logic behind certain things.
Why would I write this way (default variables are in __init__):
class Dawg:
def __init__(self):
self.previousWord = ""
self.root = DawgNode()
self.uncheckedNodes = []
self.minimizedNodes = {}
def insert( self, word ):
#...
def finish( self ):
#...
Instead of this:
class Dawg:
previousWord = ""
root = DawgNode()
uncheckedNodes = []
minimizedNodes = {}
def insert( self, word ):
#...
def finish( self ):
#...
I mean - why do I need to use __init__ -> if I can just as easily add default variables to a class directly?
When you create variables in the Class, then they are Class variables (They are common to all the objects of the class), when you initialize the variables in __init__ with self.variable_name = value then they are created per instance and called instance variables.
For example,
class TestClass(object):
variable = 1
var_1, var_2 = TestClass(), TestClass()
print var_1.variable is var_2.variable
# True
print TestClass.variable is var_1.variable
# True
Since variable is a class variable, the is operator evaluates to True. But, in case of instance variables,
class TestClass(object):
def __init__(self, value):
self.variable = value
var_1, var_2 = TestClass(1), TestClass(2)
print var_1.variable is var_2.variable
# False
print TestClass.variable is var_1.variable
# AttributeError: type object 'TestClass' has no attribute 'variable'
And you cannot access an instance variable, with just the class name.
When you write this:
class Dawg:
previousWord = ""
root = DawgNode()
uncheckedNodes = []
minimizedNodes = {}
Those are not instance variables, they're class variables (meaning: the same variables with the same values are shared between all instances of the class.) On the other hand, this:
class Dawg:
def __init__(self):
self.previousWord = ""
self.root = DawgNode()
self.uncheckedNodes = []
self.minimizedNodes = {}
... Is declaring instance variables, meaning: the values are different for each instance of the class. As you see, each snippet means a completely different thing, and you have to pick the one that is appropriate for you. Hint: most of the time you're interested in instance variables, because class variables define a kind of shared global state for your objects, which is error prone.

Defining constants in python class, is self really needed?

I want to define a set of constants in a class like:
class Foo(object):
(NONEXISTING,VAGUE,CONFIRMED) = (0,1,2)
def __init__(self):
self.status = VAGUE
However, I get
NameError: global name 'VAGUE' is not defined
Is there a way of defining these constants to be visiable inside the class without resorting to global or self.NONEXISTING = 0 etc.?
When you assign to names in the class body, you're creating attributes of the class. You can't refer to them without referring to the class either directly or indirectly. You can use Foo.VAGUE as the other answers say, or you can use self.VAGUE. You do not have to assign to attributes of self.
Usually, using self.VAGUE is what you want because it allows subclasses to redefine the attribute without having to reimplement all the methods that use them -- not that that seems like a sensible thing to do in this particular example, but who knows.
try instead of:
self.status = VAGUE
this one:
self.status = Foo.VAGUE
you MUST specify the class
This one is NOT RECOMMENDED FOR ANY CODE by any means, but an ugly hack like below can be done.
I did this just to have better understanding of Python AST API, so anyone who uses this in real-world code should be shot before it does any harm :-)
#!/usr/bin/python
# -*- coding: utf-8-unix -*-
#
# AST hack to replace symbol reference in instance methods,
# so it will be resolved as a reference to class variables.
#
import inspect, types, ast
def trim(src):
lines = src.split("\n")
start = lines[0].lstrip()
n = lines[0].index(start)
src = "\n".join([line[n:] for line in lines])
return src
#
# Method decorator that replaces symbol reference in a method
# so it will use symbols in belonging class instead of the one
# in global namespace.
#
def nsinclude(*args):
# usecase: #nsinclude()
# use classname in calling frame as a fallback
stack = inspect.stack()
opts = [stack[1][3]]
def wrap(func):
if func.func_name == "tempfunc":
return func
def invoke(*args, **kw):
base = eval(opts[0])
src = trim(inspect.getsource(func))
basenode = ast.parse(src)
class hackfunc(ast.NodeTransformer):
def visit_Name(self, node):
try:
# if base class (set in #nsinclude) can resolve
# given name, modify AST node to use that instead
val = getattr(base, node.id)
newnode = ast.parse("%s.%s" % (opts[0], node.id))
newnode = next(ast.iter_child_nodes(newnode))
newnode = next(ast.iter_child_nodes(newnode))
ast.copy_location(newnode, node)
return ast.fix_missing_locations(newnode)
except:
return node
class hackcode(ast.NodeVisitor):
def visit_FunctionDef(self, node):
if func.func_name != "tempfunc":
node.name = "tempfunc"
hackfunc().visit(node)
hackcode().visit(basenode)
newmod = compile(basenode, '<ast>', 'exec')
eval(newmod)
newfunc = eval("tempfunc")
newfunc(*args, **kw)
return invoke
# usecase: #nsinclude
if args and isinstance(args[0], types.FunctionType):
return wrap(args[0])
# usecase: #nsinclude("someclass")
if args and args[0]:
opts[0] = args[0]
return wrap
class Bar:
FOO = 987
BAR = 876
class Foo:
FOO = 123
BAR = 234
# import from belonging class
#nsinclude
def dump1(self, *args):
print("dump1: FOO = " + str(FOO))
# import from specified class (Bar)
#nsinclude("Bar")
def dump2(self, *args):
print("dump2: BAR = " + str(BAR))
Foo().dump1()
Foo().dump2()
The only way is to access it through the class name such as
Foo.VAGUE
If accessing just VAGUE inside the __init__ function, or a function, it must be declared inside that to access it the way you want.
Using self is for the instance of the class also.
In Python3, you can also reference VAGUE as:
type(self).VAGUE
This way, you are clearly referencing it as a class attribute and not an object attribute, yet this way is robust against a name change of the class. Also if you override VAGUE in a subclass, the value from the subclass will be used, just like if you were to use self.VAGUE.
Note that this method does not appear to work in Python2, at least not in my tests, where type(self) returned instance instead of the class I instantiated. Therefore Thomas Wouters's answer is probably preferable, considering how widespread Python2 still is.

Categories

Resources