Lazy loading of class attributes - python

Class Foo has a bar, and it is not loaded until it is accessed. Further accesses to bar should incur no overhead.
class Foo(object):
def get_bar(self):
print "initializing"
self.bar = "12345"
self.get_bar = self._get_bar
return self.bar
def _get_bar(self):
print "accessing"
return self.bar
Is it possible to do something like this using properties or, better yet, attributes, instead of using a getter method?
The goal is to lazy load without overhead on all subsequent accesses...

There are some problems with the current answers. The solution with a property requires that you specify an additional class attribute and has the overhead of checking this attribute on each look up. The solution with __getattr__ has the issue that it hides this attribute until first access. This is bad for introspection and a workaround with __dir__ is inconvenient.
A better solution than the two proposed ones is utilizing descriptors directly. The werkzeug library has already a solution as werkzeug.utils.cached_property. It has a simple implementation so you can directly use it without having Werkzeug as dependency:
_missing = object()
class cached_property(object):
"""A decorator that converts a function into a lazy property. The
function wrapped is called the first time to retrieve the result
and then that calculated result is used the next time you access
the value::
class Foo(object):
#cached_property
def foo(self):
# calculate something important here
return 42
The class has to have a `__dict__` in order for this property to
work.
"""
# implementation detail: this property is implemented as non-data
# descriptor. non-data descriptors are only invoked if there is
# no entry with the same name in the instance's __dict__.
# this allows us to completely get rid of the access function call
# overhead. If one choses to invoke __get__ by hand the property
# will still work as expected because the lookup logic is replicated
# in __get__ for manual invocation.
def __init__(self, func, name=None, doc=None):
self.__name__ = name or func.__name__
self.__module__ = func.__module__
self.__doc__ = doc or func.__doc__
self.func = func
def __get__(self, obj, type=None):
if obj is None:
return self
value = obj.__dict__.get(self.__name__, _missing)
if value is _missing:
value = self.func(obj)
obj.__dict__[self.__name__] = value
return value

Sure, just have your property set an instance attribute that is returned on subsequent access:
class Foo(object):
_cached_bar = None
#property
def bar(self):
if not self._cached_bar:
self._cached_bar = self._get_expensive_bar_expression()
return self._cached_bar
The property descriptor is a data descriptor (it implements __get__, __set__ and __delete__ descriptor hooks), so it'll be invoked even if a bar attribute exists on the instance, with the end result that Python ignores that attribute, hence the need to test for a separate attribute on each access.
You can write your own descriptor that only implements __get__, at which point Python uses an attribute on the instance over the descriptor if it exists:
class CachedProperty(object):
def __init__(self, func, name=None):
self.func = func
self.name = name if name is not None else func.__name__
self.__doc__ = func.__doc__
def __get__(self, instance, class_):
if instance is None:
return self
res = self.func(instance)
setattr(instance, self.name, res)
return res
class Foo(object):
#CachedProperty
def bar(self):
return self._get_expensive_bar_expression()
If you prefer a __getattr__ approach (which has something to say for it), that'd be:
class Foo(object):
def __getattr__(self, name):
if name == 'bar':
bar = self.bar = self._get_expensive_bar_expression()
return bar
return super(Foo, self).__getattr__(name)
Subsequent access will find the bar attribute on the instance and __getattr__ won't be consulted.
Demo:
>>> class FooExpensive(object):
... def _get_expensive_bar_expression(self):
... print 'Doing something expensive'
... return 'Spam ham & eggs'
...
>>> class FooProperty(FooExpensive):
... _cached_bar = None
... #property
... def bar(self):
... if not self._cached_bar:
... self._cached_bar = self._get_expensive_bar_expression()
... return self._cached_bar
...
>>> f = FooProperty()
>>> f.bar
Doing something expensive
'Spam ham & eggs'
>>> f.bar
'Spam ham & eggs'
>>> vars(f)
{'_cached_bar': 'Spam ham & eggs'}
>>> class FooDescriptor(FooExpensive):
... bar = CachedProperty(FooExpensive._get_expensive_bar_expression, 'bar')
...
>>> f = FooDescriptor()
>>> f.bar
Doing something expensive
'Spam ham & eggs'
>>> f.bar
'Spam ham & eggs'
>>> vars(f)
{'bar': 'Spam ham & eggs'}
>>> class FooGetAttr(FooExpensive):
... def __getattr__(self, name):
... if name == 'bar':
... bar = self.bar = self._get_expensive_bar_expression()
... return bar
... return super(Foo, self).__getatt__(name)
...
>>> f = FooGetAttr()
>>> f.bar
Doing something expensive
'Spam ham & eggs'
>>> f.bar
'Spam ham & eggs'
>>> vars(f)
{'bar': 'Spam ham & eggs'}

Sure it is, try:
class Foo(object):
def __init__(self):
self._bar = None # Initial value
#property
def bar(self):
if self._bar is None:
self._bar = HeavyObject()
return self._bar
Note that this is not thread-safe. cPython has GIL, so it's a relative issue, but if you plan to use this in a true multithread Python stack (say, Jython), you might want to implement some form of lock safety.

Related

After using property decorator, python object has two very similar attributes (foo.bar and foo._bar). Is that ok?

So I'm refactoring my code to be more Pythonic - specifically I've learned that using explicit getters and setters should be replaced with #property. My case is that i have an Example class with initialized bar attribute (initialization helps me to know that user set the bar):
class Example:
def __init__(self):
self.bar = 'initializedValue'
#property
def bar(self):
return self._bar
#bar.setter
def bar(self, b):
self._bar = b
def doIfBarWasSet():
if self.bar != 'initializedValue':
pass
else:
pass
after running foo = Example() my debugger shows that foo has two attributes: _bar and bar, both set to 'initializedValue'. Also, when I run foo.bar = 'changedValue' or foo._bar = 'changedValue', both of them are changed to 'changedValue'. Why there are two attributes? Isn't that redundant? I think I understand why there is _bar attribute - I added it in #bar.setter, but why there is bar as an string attribute? Shouldn't bar be rather a method leading to bar #property?
It's fine. Keep in mind that bar is not an instance attribute, but a class attribute. Since it has type property, it implements the descriptor protocol so that its behavior is different when accessed from an instance. If e is an instance of Example, then e.bar does not give you the instance of property assigned to Example.bar; it gives you the result of Example.bar.__get__(e, Example) (which in this case, happens to be Example.bar.fget(e), where fget is the original function decorated by #property).
In short, every instance has its own _bar attribute, but access to that attribute is mediated by the class attribute Example.bar.
It's easier to see that bar is a class attribute if you write this minimal (and sufficient, since neither the getter nor setter in this case requires a def statement) definition.
class Example:
def __init__(self):
self.bar = "initalizedValue"
bar = property(lambda self: self._bar, lambda self, b: setattr(self, '_bar', b))
or more generally
def bar_getter(self):
return self._bar
def bar_setter(self, b):
self._bar = b
class Example:
def __init__(self):
self.bar = "initalizedValue"
bar = property(bar_getter, bar_setter)

why instance.attribute = value can work while __set__ method is not implemented in property?

I am getting through the property to study descriptor protocol, and I am writing my own property like this:
class my_property(object):
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
self.fget = fget
self.fset = fset
self.fdel = fdel
if doc is None and fget is not None:
doc = fget.__doc__
self.__doc__ = doc
def __get__(self, obj, objtype=None):
if obj is None:
return self
if self.fget is None:
raise AttributeError("unreadable attribute")
return self.fget(obj)
def setter(self, fset):
return type(self)(self.fget, fset, self.fdel)
class test_my_property(object):
def __init__(self, value):
self._val = value
#my_property
def val(self):
return self._val
#val.setter
def val(self, value):
self._val = value
def main():
c = test_my_property(5)
print c.val
c.val = 10
print c.val
print type(c).__dict__['val'].__set__
And I get:
5
10
AttributeError: 'my_property' object has no attribute '__set__'
My question is, since "__set__" is not defined, then how "c.val = 10" can work?
if "__set__" is inherited from object by my_property, then, why it report the AttributeError?
__set__ is not inherited from object. Getting and setting val attribute works because when accessing an attribute of an object, first the instance will be checked, then the class. Since you set instance attribute val, it uses that. I think this is especially clear if you're looking at a simple example of this with no descriptors,
>>> class Foo(object):
... val = 5
...
>>> f = Foo()
>>> f.val # f doesn't have val so fallback on Foo
5
>>> f.val = 10
>>> f.val # f now has val so use it
10
>>> del f.val # oops what now
>>> f.val # class again
5
The only difference between the above example and yours is that your class val is (when you finish) a property.
With all that said, you generally don't want to be naming your property the same thing as the instance attribute that will hold it's contents. The usual formulation is something like this,
class Foo(object):
def __init__(self, value):
self._val = value
#property
def val(self):
return self._val
#Jared's answer is correct. __set__ is not inherited from object. I'll try to explain in another way, which might be clearer.
First, as you already understand, if your descriptor do have a __set__ method, it gets called when running c.val=10. That means the interpreter looks for the __set__ method, and if it founds it, it treats it as a descriptor, by calling it.
Now, since my_property does not have a __set__ method, it won't get the descriptor treatment when running c.val=10. The interpreter falls back to the "standard" treatment, which is roughly equivalent to c.__dict__['val']=10.
You can easily verify that using:
print c.__dict__ # no 'val'
c.val = 10
print c.__dict__ # 'val' was added
Now, the 'val' in c.__dict__ (at object level) overshaddows your property (which is defined at class level), and will get used when accessing c.val.
If you want to forbid assignment to your property, you'd need to do it explicitly. You'd need to define a __set__ method and raise an error in it.

Python class #property: use setter but evade getter?

In python classes, the #property is a nice decorator that avoids using explicit setter and getter functions. However, it comes at a cost of an overhead 2-5 times that of a "classical" class function. In my case, this is quite OK in the case of setting a property, where the overhead is insignificant compared to the processing that needs to be done when setting.
However, I need no processing when getting the property. It is always just "return self.property". Is there an elegant way to use the setter but not using the getter, without needing to use a different internal variable?
Just to illustrate, the class below has the property "var" which refers to the internal variable "_var". It takes longer to call "var" than "_var" but it would be nice if developers and users alike could just use "var" without having to keep track of "_var" too.
class MyClass(object):
def __init__(self):
self._var = None
# the property "var". First the getter, then the setter
#property
def var(self):
return self._var
#var.setter
def var(self, newValue):
self._var = newValue
#... and a lot of other stuff here
# Use "var" a lot! How to avoid the overhead of the getter and not to call self._var!
def useAttribute(self):
for i in xrange(100000):
self.var == 'something'
For those interested, on my pc calling "var" takes 204 ns on average while calling "_var" takes 44 ns on average.
Don't use a property in this case. A property object is a data descriptor, which means that any access to instance.var will invoke that descriptor and Python will never look for an attribute on the instance itself.
You have two options: use the .__setattr__() hook or build a descriptor that only implements .__set__.
Using the .__setattr__() hook
class MyClass(object):
var = 'foo'
def __setattr__(self, name, value):
if name == 'var':
print "Setting var!"
# do something with `value` here, like you would in a
# setter.
value = 'Set to ' + value
super(MyClass, self).__setattr__(name, value)
Now normal attribute lookups are used when reading .var but when assigning to .var the __setattr__ method is invoked instead, letting you intercept value and adjust it as needed.
Demo:
>>> mc = MyClass()
>>> mc.var
'foo'
>>> mc.var = 'bar'
Setting var!
>>> mc.var
'Set to bar'
A setter descriptor
A setter descriptor would only intercept variable assignment:
class SetterProperty(object):
def __init__(self, func, doc=None):
self.func = func
self.__doc__ = doc if doc is not None else func.__doc__
def __set__(self, obj, value):
return self.func(obj, value)
class Foo(object):
#SetterProperty
def var(self, value):
print 'Setting var!'
self.__dict__['var'] = value
Note how we need to assign to the instance .__dict__ attribute to prevent invoking the setter again.
Demo:
>>> f = Foo()
>>> f.var = 'spam'
Setting var!
>>> f.var = 'ham'
Setting var!
>>> f.var
'ham'
>>> f.var = 'biggles'
Setting var!
>>> f.var
'biggles'
property python docs: https://docs.python.org/2/howto/descriptor.html#properties
class MyClass(object):
def __init__(self):
self._var = None
# only setter
def var(self, newValue):
self._var = newValue
var = property(None, var)
c = MyClass()
c.var = 3
print ('ok')
print (c.var)
output:
ok
Traceback (most recent call last):
File "Untitled.py", line 15, in <module>
print c.var
AttributeError: unreadable attribute
The #WeizhongTu answer
class MyClass(object):
def __init__(self):
self._var = None
# only setter
def var(self, newValue):
self._var = newValue
var = property(None, var)
c = MyClass()
c.var = 3
print ('ok')
print (c.var)
Is fine, except from the fact that is making the variable ungettable...
A similar solution but preserving getter is with
var = property(lambda self: self._var, var)
instead of
var = property(None, var)
The accepted answer's setter descriptor would be probably more convenient if it set the property by itself:
A setter descriptor (alt.)
class setter:
def __init__(self, func, doc=None):
self.func = func
self.__doc__ = doc or func.__doc__
def __set__(self, obj, value):
obj.__dict__[self.func.__name__] = self.func(obj, value)
class Foo:
#setter
def var(self, value):
print('Setting var!')
# validations and/or operations on received value
if not isinstance(value, str):
raise ValueError('`var` must be a string')
value = value.capitalize()
# returns property value
return value
Demo:
>>> f = Foo()
>>> f.var = 'spam'
Setting var!
>>> f.var = 'ham'
Setting var!
>>> f.var
'Ham'
>>> f.var = 'biggles'
Setting var!
>>> f.var
'Biggles'
>>> f.var = 3
ValueError: `var` must be a string

Use class variables as instance vars?

What I would like to do there is declaring class variables, but actually use them as vars of the instance. I have a class Field and a class Thing, like this:
class Field(object):
def __set__(self, instance, value):
for key, v in vars(instance.__class__).items():
if v is self:
instance.__dict__.update({key: value})
def __get__(self, instance, owner):
for key, v in vars(instance.__class__).items():
if v is self:
try:
return instance.__dict__[key]
except:
return None
class Thing(object):
foo = Field()
So when I instantiate a thing and set attribute foo, it will be added to the instance, not the class, the class variable is never actually re-set.
new = Thing()
new.foo = 'bar'
# (foo : 'bar') is stored in new.__dict__
This works so far, but the above code for Field is rather awkward. It has too look for the Field object instance in the classes props, otherwise there seems no way of knowing the name of the property (foo) in __set__ and __get__. Is there another, more straight forward way to accomplish this?
Every instance of Field (effectively) has a name. Its name is the attribute name (or key) which references it in Thing. Instead of having to look up the key dynamically, you could instantiate Fields with the name at the time the class attribute is set in Thing:
class Field(object):
def __init__(self, name):
self.name = name
def __set__(self, instance, value):
instance.__dict__.update({self.name: value})
def __get__(self, instance, owner):
if instance is None:
return self
try:
return instance.__dict__[self.name]
except KeyError:
return None
def make_field(*args):
def wrapper(cls):
for arg in args:
setattr(cls, arg, Field(arg))
return cls
return wrapper
#make_field('foo')
class Thing(object):
pass
And it can be used like this:
new = Thing()
Before new.foo is set, new.foo returns None:
print(new.foo)
# None
After new.foo is set, 'foo' is an instance attribute of new:
new.foo = 'bar'
print(new.__dict__)
# {'foo': 'bar'}
You can access the descriptor (the Field instance itself) with Thing.foo:
print(Thing.foo)
# <__main__.Field object at 0xb76cedec>
PS. I'm assuming you have a good reason why
class Thing(object):
foo = None
does not suffice.
Reread your question and realized I had it wrong:
You don't need to override the default python behavior to do this. For example, you could do the following:
class Thing(object):
foo = 5
>>> r = Thing()
>>> r.foo = 10
>>> s = Thing()
>>> print Thing.foo
5
>>> print r.foo
10
>>> print s.foo
5
If you want the default to be 'None' for a particular variable, you could just set the class-wide value to be None. That said, you would have to declare it specifically for each variable.
The easiest way would be to call the attribute something else than the name of the descriptor variable - preferably starting with _ to signal its an implementation detail. That way, you end up with:
def __set__(self, instance, value):
instance._foo = value
def __get__(self, instance, owner):
return getattr(instance, '_foo', None)
The only drawback of this is that you can't determine the name of the key from the one used for the descriptor. If that increased coupling isn't a problem compared to the loop, you could just use a property:
class Thing:
#property
def foo(self):
return getattr(self, '_foo', None)
#foo.setter
def foo(self, value):
self._foo = value
otherwise, you could pass the name of the variable into the descriptor's __init__, so that you have:
class Thing:
foo = Field('_foo')
Of course, all this assumes that the simplest and most Pythonic way - use a real variable Thing().foo that you set to None in Thing.__init__ - isn't an option for some reason. If that way will work for you, you should prefer it.

Is it somehow possible to *live* modify Python code (like in Lisp or Erlang)

I was wondering if it is somehow possible to modify Python code live, while keeping all state of instantiated objects and methods, like I think is possible in Lisp or Erlang (*) ?
Say, I have an active Python sessions, where I instantiated the foo class from a self-written module:
class foo():
#classmethod
def do_something(self):
print "this is good"
Python command line:
>>> f =foo()
>>> f.do_something()
Now, I would like to change the print statement into something else (e.g. print "this is better"). If I edit my module file to do so, and reload it, I have to re-instantiate the f object. Is there a way to be able to just call f.do_something() again without having to call f=foo() first?
So, I have to do this:
>>> reload my_module
>>> f =foo()
>>> f.do_something() # with changed print statement
But I want to do this:
>>> reload my_module
>>> f.do_something() # with changed print statement
(*) I am basing this statement on the cool Erlang movie and this fragment from Practical Common Lisp: 'When the bug manifested in the wild--100 million miles away from Earth--the team was able to diagnose and fix the running code, allowing the experiments to complete.'
Edit: I've been thinking a bit more about this and maybe what I want is inherently flawed for applying to OO (i.e., what about the state of the class and methods). I think Erlang allows this because, as far as I recall, it is more about separate communicating objects, so live updating the code of an object makes more sense. I am not sure though, so still open for answers.
Edit2: Maybe the best way to describe what I want is recapitulate what I said in a comment in a post below: "When called, the methods just have to point to the new method definitions/locations."
Yes you can, pretty simple too. You want to change only the instance f, not the class foo, right?
>>> class foo():
#classmethod
def do_something(self):
print "this is good"
>>> f = foo()
>>> f.do_something()
this is good
>>> def new_ds():
print "this is better"
>>> f.do_something = new_ds
>>> f.do_something()
this is better
>>> f2 = foo()
>>> f2.do_something() #unchanged
this is good
EDIT
This is almost certainly less than desirable due to the change in scope, but changes like this took place for me immediately upon reload
testmod.py -- initially
class foo():
#classmethod
def do_something(self):
outside_func()
def outside_func():
print "this is good"
testmod.py -- after change
class foo():
#classmethod
def do_something(self):
outside_func()
def outside_func():
print "this is better"
Interpreter
>>> import testmod
>>> f = testmod.foo()
>>> f.do_something()
this is good
>>> reload(testmod)
<module 'testmod' from 'C:\Python26\testmod.py'>
>>> f.do_something()
this is better
You can create a class decorator or a metaclass that makes sure that the class of the old objects is changed on class reload. Here's a working (at least for me) example, though I wouldn't suggest you to use it as it is, but use it as an inspiration to create something that matches your intentions and needs. (It is also not tested on classes that don't define __init__, so be wary.)
import sys
import weakref
class _ObSet(weakref.WeakValueDictionary):
def add(self, ob):
self[id(ob)] = ob
def remove(self, ob):
del self[id(ob)]
def __iter__(self):
return self.itervalues()
def reloadable(cls):
# Change the __init__ of the old class to store the instances
# in cls.__instances (you might stick this into a class as a
# static method to avoid name collisions)
if '__init__' in vars(cls):
old_init = vars(cls)['__init__']
def __init__(self, *a, **kw):
self.__class__.__instances.add(self)
old_init(self, *a, **kw)
cls.__init__ = __init__
elif '__new__' in vars(cls):
old_new = vars(cls)['__new__']
def __new__(cls, *a, **kw):
self = old_new(cls, *a, **kw)
cls.__instances.add(self)
return self
cls.__new__ = __new__
else:
def __init__(self, *a, **kw):
self.__class__.__instances.add(self)
super(cls, self).__init__(*a, **kw)
cls.__init__ = __init__
cls.__instances = _ObSet()
module = sys.modules.get(cls.__module__)
if module is None:
return cls
old_cls = getattr(module, cls.__name__, None)
if old_cls is None:
return cls
# Change the bases of all subclasses of the old class
for ob in old_cls.__instances:
if ob.__class__ is old_cls:
ob.__class__ = cls
# Change the class of all instances of the old class
for child_cls in old_cls.__subclasses__():
child_cls.__bases__ = tuple(cls if base is old_cls else base
for base in child_cls.__bases__)
return cls
Here's an example of how it is used:
from reloading import reloadable
#reloadable
class A(object):
def __init__(self, a, b):
self.a = a
self.b = b
class B1(A):
def __init__(self, c, *a):
super(B1, self).__init__(*a)
self.c = c
#reloadable
class B2(A):
def __init__(self, c, *a):
super(B2, self).__init__(*a)
self.c = c
And then how it works:
>>> import test_reload
>>> a = test_reload.A(1, 2)
>>> b1 = test_reload.B1(1, 2, 3)
>>> b2 = test_reload.B2(1, 4, 6)
>>> isinstance(a, test_reload.A)
True
>>> isinstance(b1, test_reload.A)
True
>>> isinstance(b1, test_reload.B1)
True
>>> isinstance(b2, test_reload.A)
True
>>> isinstance(b2, test_reload.B2)
True
>>> reload(test_reload)
<module 'test_reload' from 'test_reload.pyc'>
>>> isinstance(a, test_reload.A)
True
>>> isinstance(b1, test_reload.A)
True
>>> isinstance(b1, test_reload.B1) # will fail, not #reloadable
False
>>> isinstance(b2, test_reload.A)
True
>>> isinstance(b2, test_reload.B2)
True
>>> a.a, a.b
(1, 2)
>>> b1.a, b1.b, b1.c
(2, 3, 1)
>>> b2.a, b2.b, b2.c
(4, 6, 1)
This shows that you can modify an existing class and have those changes be manifest in instances of that class. The key is to modify the existing class rather than (re)create a new class with the same name as the old class.
>>> class foo():
... #classmethod
... def do_something(self):
... print "this is good"
...
>>> f = foo()
>>> f.do_something()
this is good
>>> def do_something_else(self):
... print "this is better"
...
>>> foo.do_something = do_something_else
>>> f.do_something()
this is better

Categories

Resources