Is it possible to overload a call to vars(object) in Python? - python

I have a class that overloads object attribute access by returning the attributes of its "row" attribute, along the following lines:
from collections import namedtuple
class MyObj(object):
def __init__(self, y, z):
r = namedtuple('row', 'a b')
self.row = r(y, z)
self.arbitrary = True
def __getattr__(self, attr):
return getattr(self.row, attr)
def __dir__(self):
return list(self.row._fields)
In [2]: m = MyObj(1, 2)
In [3]: dir(m)
Out[3]: ['a', 'b']
In [4]: m.a
Out[4]: 1
In [5]: vars(m)
Out[5]: {'arbitrary': True, 'row': row(a=1, b=2)}
In [6]: output = '{a} -> {b}'
In [7]: output.format(**vars(m.row))
Out[7]: '1 -> 2'
In [8]: output.format(**vars(m))
KeyError: 'a'
As I quite often do string formatting using vars() I'd like to be able to access row's attributes directly from the call to vars(). Is this possible?
Edit following aaronsterling's answer
The key to solving this, thanks to Aaron's pointer, is to check for __dict__ in __getattribute__
from collections import namedtuple
class MyObj(object):
def __init__(self, y, z):
r = namedtuple('row', 'a b')
self.row = r(y, z)
self.arbitrary = True
def __getattr__(self, attr):
return getattr(self.row, attr)
def __getattribute__(self, attribute):
if attribute == '__dict__':
return self.row._as_dict()
else:
return object.__getattribute__(self, attribute)
def __dir__(self):
return list(self.row._fields)
In [75]: m = MyObj(3, 4)
In [76]: m.a
Out[76]: 3
In [77]: vars(m)
Out[77]: OrderedDict([('a', 3), ('b', 4)])

You can also define a #property for __dict__ instead of overriding __getattribute__.
class Example:
#property
def __dict__(self):
return "From vars()"
e = Example()
print(vars(e)) # From vars()

The docs are so kind as to specify that vars works by returning the __dict__ attribute of the object it's called on. Hence overriding __getattribute__ does the trick. Y
I subclass dict (but see later) to override the __str__ function. The subclass accepts a function str_func which gets called to return a string representation of how you want your __dict__ object to appear. It does this by constructing a regular dictionary with the entries that you want and then calling str on that.
This is very hacky. In particular, it will break any code that depends on doing anything like
myobj.__dict__[foo] = bar
This code will now update a phantom dictionary and not the real one.
A much more robust solution would depend on completely replacing all methods that set values on the SpoofedDict with methods that actually update myobj.__dict__. This would require SpoofedDict instances to hold a reference to myobj.__dict__. Then of course, the methods that read values would have to fetch them out of myobj.__dict__ as well.
At that point, you're better off using collections.Mapping to construct a custom class rather than subclassing from dict.
Here's the proof of concept code, hackish as it may be:
from collections import namedtuple
class SpoofDict(dict):
def __init__(self, *args, **kwargs):
self.str_func = kwargs['str_func']
del kwargs['str_func']
dict.__init__(self, *args, **kwargs)
def __str__(self):
return self.str_func()
class MyObj(object):
def __init__(self, y, z):
r = namedtuple('row', 'a b')
self.row = r(y, z)
self.arbitrary = True
def __getattr__(self, attr):
return getattr(self.row, attr)
def __dir__(self):
return list(self.row._fields)
def str_func(self):
attrs = list(self.row._fields)
str_dict = {}
row = object.__getattribute__(self, 'row')
for attr in attrs:
str_dict[attr] = getattr(row, attr)
return str(str_dict)
def __getattribute__(self, attribute):
if attribute == '__dict__':
spoof_dict = SpoofDict(str_func=self.str_func)
spoof_dict.update(object.__getattribute__(self, '__dict__'))
return spoof_dict
else:
return object.__getattribute__(self, attribute)
if __name__=='__main__':
m = MyObj(1, 2)
print "dir(m) = {0}".format(dir(m))
print "vars(m) = {0}".format(vars(m))
print "m.row = {0}".format(m.row)
print "m.arbitrary = {0}".format(m.arbitrary)

Related

python class behaves like dictionary-or-list-data-like

In python3 console, input those:
>>> import sys
>>> sys.version_info
sys.version_info(major=3, minor=4, micro=3, releaselevel='final', serial=0)
>>> type(sys.version_info) # this is class type
<class 'sys.version_info'>
>>> sys.version_info[0:2] # ?? But it acts like a list-data-like
(3, 4)
My questions are:
How can a class act like dictionary-or-list-data-like?
May give an example to construct a class like this?
Is there some documentation about
this?
Python contains several methods for emulating container types such as dictionaries and lists.
In particular, consider the following class:
class MyDict(object):
def __getitem__(self, key):
# Called for getting obj[key]
def __setitem__(self, key, value):
# Called for setting obj[key] = value
If you write
obj = MyDict()
Then
obj[3]
will call the first method, and
obj[3] = 'foo'
will call the second method.
If you further want to support
len(obj)
then you just need to add the method
def __len__(self):
# Return here the logical length
Here is an example of a (very inefficient) dictionary implemented by a list
class MyDict(object):
def __init__(self, seq=None):
self._vals = list(seq) if seq is not None else []
def __getitem__(self, key):
return [v[1] for v in self._vals if v[0] == key][0]
def __setitem__(self, key, val):
self._vals = [v for v in self._vals if v[0] != key]
self._vals.append((key, val))
def __len__(self):
return len(self._vals)
You can use it pretty much like a regular dict:
obj = MyDict()
obj[2] = 'b'
>>> obj[2]
'b'
It's quite easy ... All you need to do is define a __getitem__ method that handles slicing or integer/string lookup. You can do pretty much whatever you want...
class Foo(object):
def __init__(self, bar, baz):
self.bar = bar
self.baz = baz
def __getitem__(self, ix):
return (self.bar, self.baz).__getitem__(ix)
Here's a cheat sheet of what will be passed to __getitem__ as ix in the following situations:
f[1] # f.__getitem__(1)
f[1:] # f.__getitem__(slice(1, None, None))
f[1:, 2] # f.__getitem__( (slice(1, None, None), 2) )
f[1, 2] # f.__getitem__( (1, 2) )
f[(1, 2)] # f.__getitem__( (1, 2) )
The trick (which can be slightly non-trivial) is simply writing __getitem__ so that it looks at the type of the object that was passed and then does the right thing. For my answer, I cheated by creating a tuple in my __getitem__ and then I called __getitem__ on the tuple (since it already does the right thing in all of the cases that I wanted to support)
Here's some example usage:
>>> f = Foo(1, 2)
>>> f[1]
2
>>> f[0]
1
>>> f[:]
(1, 2)
note that you don't typically need to even do this yourself. You can create a named tuple to do the job for you:
from collections import namedtuple
Foo = namedtuple('Foo', 'bar, baz')
And usage is pretty much the same:
>>> f = Foo(1, 2)
>>> f[1]
2
>>> f[0]
1
>>> f[:]
(1, 2)
The main difference here is that our namedtuple is immutable. Once created, we can't change it's members.
i think in python like ECMAScript (aka javascript) class is a dictionary or associative array(associative array). since you can add a property or method to your class at runtime.(see)
class A(object):
def __init__(self):
self.x = 0
a = A()
a.y=5
print a.y # 5
if you want write a class like that you can use __getitem__ and __setitem__ methods:
class A(object):
class B(object):
def __init__(self, x, y):
self.vals = (x, y)
def __getitem__(self, key):
return self.vals[key]
def __setitem__(self, key, val):
self.vals[key] = val
def __len__(self):
return len(self.__vals)
def __init__(self, x, y):
self.b = self.B(x,y)
a = A('foo','baz')
print type(a.b) # __main__.b __main__ because we run script straightly
print a.b[:] # ('foo', 'baz')
You can achieve the same behaviour by overriding getitem() and setitem() in your class.
class Example:
def __getitem__(self, index):
return index ** 2
>>> X = Example()
>>> X[2]
>>> 4
You can override setitem() too in your class for achieving the setter kind of thing.

pythonic way to index list of objects

I have a list of objects. Each object has two fields
obj1.status = 2
obj1.timestamp = 19211
obj2.status = 3
obj2.timestamp = 14211
obj_list = [obj1, obj2]
I will keep adding / deleting objects in the list and also changing attributes of objects, for example I may change ob1.status to 5.
Now I have two dicts
dict1 - <status, object>
dict2 - <timestamp, object>
How do I design a simple solution so that whenever I modify/delete/insert elements in the list, the maps get automatically updated. I am interested in a pythonic solution that is elegant and extensible. For example in future, I should be able to easily add another attribute and dict for that as well
Also for simplicity, let us assume all attributes value are different. For example no two objects will have same status
You could override the __setattr__ on the objects to update the indexes whenever you set the values. You can use a weakref dictionary for the indexes so that when you delete objects and are no longer using them, they are automatically removed from the indexes.
import weakref
from bunch import Bunch
class MyObject(object):
indexes = Bunch() # Could just use dict()
def __init__(self, **kwargs):
super(MyObject, self).__init__()
for k, v in kwargs.items():
setattr(self, k, v)
def __setattr__(self, name, value):
try:
index = MyObject.indexes[name]
except KeyError:
index = weakref.WeakValueDictionary()
MyObject.indexes[name] = index
try:
old_val = getattr(self, name)
del index[old_val]
except (KeyError, AttributeError):
pass
object.__setattr__(self, name, value)
index[value] = self
obj1 = MyObject(status=1, timestamp=123123)
obj2 = MyObject(status=2, timestamp=2343)
print MyObject.indexes.status[1]
print obj1.indexes.timestamp[2343]
obj1.status = 5
print obj2.indexes['status'][5]
I used a Bunch here because it allows you to access the indexes using .name notation, but you could just use a dict instead and use the ['name'] syntax.
One approach here would be to create a class level dict for MyObj and define updating behavior using property decorator. Every time an object is changed or added, it is reflected in the respected dictionaries associated with the class.
Edit: as #BrendanAbel points out, using weakref.WeakValueDictionary in place of dict handles object deletion from class level dicts.
from datetime import datetime
from weakref import WeakValueDictionary
DEFAULT_TIME = datetime.now()
class MyObj(object):
"""
A sample clone of your object
"""
timestamps = WeakValueDictionary()
statuses = WeakValueDictionary()
def __init__(self, status=0, timestamp=DEFAULT_TIME):
self._status = status
self._timestamp = timestamp
self.status = status
self.timestamp = timestamp
def __update_class(self):
MyObj.timestamps.update({self.timestamp: self})
MyObj.statuses.update({self.status: self})
def __delete_from_class(self):
maybe_self = MyObj.statuses.get(self.status, None)
if maybe_self is self is not None:
del MyObj.statuses[self.status]
maybe_self = MyObj.timestamps.get(self.timestamp, None)
if maybe_self is self is not None:
del MyObj.timestamps[self.timestamp]
#property
def status(self):
return self._status
#status.setter
def status(self, val):
self.__delete_from_class()
self._status = val
self.__update_class()
#property
def timestamp(self):
return self._timestamp
#timestamp.setter
def timestamp(self, val):
self.__delete_from_class()
self._timestamp = val
self.__update_class()
def __repr__(self):
return "MyObj: status={} timestamp={}".format(self.status, self.timestamp)
obj1 = MyObj(1)
obj2 = MyObj(2)
obj3 = MyObj(3)
lst = [obj1, obj2, obj3]
# In [87]: q.lst
# Out[87]:
# [MyObj: status=1 timestamp=2016-05-27 13:43:38.158363,
# MyObj: status=2 timestamp=2016-05-27 13:43:38.158363,
# MyObj: status=3 timestamp=2016-05-27 13:43:38.158363]
# In [88]: q.MyObj.statuses[1]
# Out[88]: MyObj: status=1 timestamp=2016-05-27 13:43:38.158363
# In [89]: q.MyObj.statuses[1].status = 42
# In [90]: q.MyObj.statuses[42]
# Out[90]: MyObj: status=42 timestamp=2016-05-27 13:43:38.158363
# In [91]: q.MyObj.statuses[1]
# ---------------------------------------------------------------------------
# KeyError Traceback (most recent call last)
# <ipython-input-91-508ab072bfc4> in <module>()
# ----> 1 q.MyObj.statuses[1]
# KeyError: 1
For a collection to be aware of mutation of its elements, there must be some connection between the elements and that collection which can communicate when changes happen. For this reason, we either must bind an instance to a collection or proxy the elements of the collection so that change-communication doesn't leak into the element's code.
A note about the implementation I'm going to present, the proxying method only works if the attributes are changed by direct setting, not inside of a method. A more complex book-keeping system would be necessary then.
Additionally, it assumes that exact duplicates of all attributes won't exist, given that you require the indices be built out of set objects instead of list
from collections import defaultdict
class Proxy(object):
def __init__(self, proxy, collection):
self._proxy = proxy
self._collection = collection
def __getattribute__(self, name):
if name in ("_proxy", "_collection"):
return object.__getattribute__(self, name)
else:
proxy = self._proxy
return getattr(proxy, name)
def __setattr__(self, name, value):
if name in ("_proxy", "collection"):
object.__setattr__(self, name, value)
else:
proxied = self._proxy
collection = self._collection
old = getattr(proxied, name)
setattr(proxy, name, value)
collection.signal_change(proxied, name, old, value)
class IndexedCollection(object):
def __init__(self, items, index_names):
self.items = list(items)
self.index_names = set(index_names)
self.indices = defaultdict(lambda: defaultdict(set))
def __len__(self):
return len(self.items)
def __iter__(self):
for i in range(len(self)):
yield self[i]
def remove(self, obj):
self.items.remove(obj)
self._remove_from_indices(obj)
def __getitem__(self, i):
# Ensure consumers get a proxy, not a raw object
return Proxy(self.items[i], self)
def append(self, obj):
self.items.append(obj)
self._add_to_indices(obj)
def _add_to_indices(self, obj):
for indx in self.index_names:
key = getattr(obj, indx)
self.indices[indx][key].add(obj)
def _remove_from_indices(self, obj):
for indx in self.index_names:
key = getattr(obj, indx)
self.indices[indx][key].remove(obj)
def signal_change(self, obj, indx, old, new):
if indx not in self.index_names:
return
# Tell the container to update its indices for a
# particular attribute and object
self.indices[indx][old].remove(obj)
self.indices[indx][new].add(obj)
I am not sure if this is what you are asking for but ...
Objects:
import operator
class Foo(object):
def __init__(self):
self.one = 1
self.two = 2
f = Foo()
f.name = 'f'
g = Foo()
g.name = 'g'
h = Foo()
h.name = 'h'
name = operator.attrgetter('name')
lists: a initially contains f and b initially contains h
a = [f]
b = [h]
dictionaries: each with one item whose value is one of the lists
d1 = {1:a}
d2 = {1:b}
d1[1] is list a which contains f and f.one is 1
>>> d1
{1: [<__main__.Foo object at 0x03F4CA50>]}
>>> name(d1[1][0])
'f'
>>> name(d1[1][0]), d1[1][0].one
('f', 1)
changing f.one is seen in the dictionary
>>> f.one = '?'
>>> name(d1[1][0]), d1[1][0].one
('f', '?')
>>>
d2[1] is list b which contains h
>>> d2
{1: [<__main__.Foo object at 0x03F59070>]}
>>> name(d2[1][0]), d2[1][0].one
('h', 1)
Add an object to b and it is seen in the dictionary
>>> b.append(g)
>>> b
[<__main__.Foo object at 0x03F59070>, <__main__.Foo object at 0x03F4CAF0>]
>>> d2
{1: [<__main__.Foo object at 0x03F59070>, <__main__.Foo object at 0x03F4CAF0>]}
>>> name(d2[1][1]), d2[1][1].one
('g', 1)

Python make dictionary items accessible as object property

This is mostly syntactic sugar but I'd like to access the items of a dictionary as object properties.
Example:
class CoolThing():
def __init__(self):
self.CoolDict = {'a': 1, 'b': 2}
and I'd like to have
my_cool_thing.a # => 1
my_cool_thing.b # => 2
Edit: some code of a potential solution with a nested structure with dot notation: device.property.field
class Parameters():
def __init__(self, ids, devices):
self._ids = ids
self._devices = devices
for p in self._devices:
p = p[0]
if self.__dict__.get(p.device) is None:
self.__dict__[p.device] = SmartDict()
else:
if self.__dict__[p.device].get(p.property) is None:
self.__dict__[p.device][p.property] = SmartDict()
else:
if self.__dict__[p.device][p.property].get(p.field) is None:
self.__dict__[p.device][p.property][p.field] = ParameterData(p)
class SmartDict():
def __init__(self):
self.__dict__ = {}
def __getitem__(self, k):
return self.__dict__[k]
def __setitem__(self, k, v):
self.__dict__[k] = v
def get(self, k):
return self.__dict__.get(k)
def __len__(self):
return len(self.__dict__)
You want __getattr__ and __setattr__, though you'll have to roll your own class (I'm not aware of any builtins, though namedtuple might work if you don't need to change values much)
class AttrDict(dict):
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, value):
self[attr] = value
If you just want to access a sub-dictionary that way, you just change self to self.cool_dict
class CoolThing:
def __init__(self):
self.cool_dict = {'a': 1, 'b': 2}
def __getattr__(self, attr):
return self.cool_dict[attr]
def __setattr__(self, attr, value):
# Note, you'll have to do this for anything that you want to set
# in __init__.
if attr == 'cool_dict':
super().__setattr__(attr, value)
else:
self.cool_dict[attr] = value
Note that __getattr__ is used after any other lookups fail, but if you want to ensure that your function is called first, you can use __getattribute__
Also note that self.cool_dict does not exist on CoolThing until after __init__ is called. My initial version of this would throw a maximum recursion depth exceeded, because as you created the class it would go to set self.cool_dict in init, call __setattr__, which would try to get self.cool_dict so it could set [attr] = value on it. Naturally it can't find cool_dict yet, and so it will try to call __getattr__ again... which can't find cool_dict and round and round it goes.
Another option would be to use a class-level variable instead, but that's probably not at all what you want :)
CoolDict already exists, it's named __dict__:
>>> class CoolThing(object):
... def __init__(self):
... self.__dict__['a'] = 1
... self.__dict__['b'] = 2
...
>>> thing = CoolThing()
>>> thing.a
1
>>> thing.b
2
>>> thing.c = 3
>>> thing.__dict__
{'a': 1, 'b': 2, 'c': 3}

Load All Method Results in Python Class to Dictionary

If I have a class like that below (in reality, it has a lot more methods), and I want to load each method's results into a dictionary, is there a faster way to do features_to_dict, that's also modular if I add new methods?
from bs4 import BeautifulSoup
class CraigsPage():
def __init__(self, page_file):
self._page = open(page_file).read()
self.soup = BeautifulSoup(self._page)
self.title = self.soup.title.string
self.body = str(self.soup.find('section', id='postingbody'))
def get_title_char_count(self):
return len(list(self.title.replace(' ', '')))
def get_title_word_count(self):
return len(self.title.split())
def get_body_char_count(self):
return len(list(self.body.replace(' ', '')))
def features_to_dict(self):
feature_dict = {}
feature_dict['title_char_count'] = self.get_title_char_count()
feature_dict['title_word_count'] = self.get_title_word_count()
feature_dict['body_char_count'] = self.get_body_char_count()
return feature_dict
The inspect module is handy for this sort of stuff:
def features_to_dict(self):
members = inspect.getmembers(self, inspect.ismethod)
return {name: method() for name, method in members if name.startswith('get')}
Python classes have __dict__ attribute, that stores all attributes for the class as a dictionary. The following snippet iterates over __dict__ trying to find functions beginning with get, and automatically runs them, storing the results to a dict:
class A(object):
def get_a(self):
return 1
def get_b(self):
return 2
def features_to_dict(self):
self.d = {}
for f_name, f in A.__dict__.iteritems():
if 'get' in f_name:
self.d[f_name[4:]] = f(self)
a = A()
a.features_to_dict()
print a.d
This returns {'a': 1, 'b': 2}.
Use dir() method instead of dict attribute.
class A(object):
def method(self):
return 123
def call_all(self):
skip = dir(object) + ['call_all']
results = {}
for key in dir(self):
if key not in skip and callable(getattr(self, key)):
try:
results[key] = getattr(self, key)()
except Exception as e:
results[key] = e
return results
Kinda simple approach, to not use introspection at all and explicitly define which methods to call:
class A(object):
methods_to_call = [
"get_title_char_count",
"get_title_word_count",
"get_body_char_count",
]
...
def features_to_dict(self):
feature_dict = {}
for method in self.methods_to_call:
feature_dict[method[4:]] = getattr(self, method)()
return feature_dict

Python - Access a class from a list using a key

Is there any way to make a list of classes behave like a set in python?
Basically, I'm working on a piece of software that does some involved string comparison, and I have a custom class for handling the strings. Therefore, there is an instance of the class for each string.
As a result, I have a large list containing all these classes. I would like to be able to access them like list[key], where in this case, the key is a string the class is based off of (note: the string will never change once the class is instantiated, so it should be hashable).
It seems to me that I should be able to do this somewhat easily, by adding something like __cmp__ to the class, but either I'm being obtuse (likely), or I'm missing something in the docs.
Basically, I want to be able to do something like this (Python prompt example):
>>class a:
... def __init__(self, x):
... self.var = x
...
>>> from test import a
>>> cl = set([a("Hello"), a("World"), a("Pie")])
>>> print cl
set([<test.a instance at 0x00C866C0>, <test.a instance at 0x00C866E8>, <test.a instance at 0x00C86710>])
>>> cl["World"]
<test.a instance at 0x00C866E8>
Thanks!
Edit Some additional Tweaks:
class a:
... def __init__(self, x):
... self.var = x
... def __hash__(self):
... return hash(self.var)
...
>>> v = a("Hello")
>>> x = {}
>>> x[v]=v
>>> x["Hello"]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
KeyError: 'Hello'
>>> x["Hello"]
Just write a class that behaves a bit like a mapping:
class ClassDict(object):
def __init__(self):
self.classes = {}
def add(self, cls):
self.classes[cls.__name__] = cls
def remove(self, cls):
if self.classes[cls.__name__] == cls:
del self.classes[cls.__name__]
else:
raise KeyError('%r' % cls)
def __getitem__(self, key):
return self.classes[key]
def __repr__(self):
return 'ClassDict(%s)' % (', '.join(self.classes),)
class C(object):
pass
class D(object):
pass
cd = ClassDict()
cd.add(C)
cd.add(D)
print cd
print cd['C']
Why don't you just do:
>>> v = MyStr("Hello")
>>> x = {}
>>> x[v.val]=v
>>> x["Hello"]
MyStr("Hello")
Why go through all the trouble of trying to create a hand-rolled dict that uses different keys than the ones you pass in? (i.e. "Hello" instead of MyStr("Hello")).
ex.
class MyStr(object):
def __init__(self, val):
self.val = str(val)
def __hash__(self):
return hash(self.val)
def __str__(self):
return self.val
def __repr__(self):
return 'MyStr("%s")' % self.val
>>> v = MyStr("Hello")
>>> x = {}
>>> x[str(v)]=v
>>> x["Hello"]
MyStr("Hello")
Set and dict use the value returned by an object's __hash__ method to look up the object, so this will do what you want:
>>class a:
... def __init__(self, x):
... self.var = x
...
... def __hash__(self):
... return hash(self.var)
As I remember "set" and "dict" uses also __hash__
From Python 2.x doc:
A dictionary’s keys are almost arbitrary values. Values that are not hashable, that is, values containing lists, dictionaries or other mutable types (that are compared by value rather than by object identity) may not be used as keys.
Do you want something like this
class A(object):
ALL_INSTANCES = {}
def __init__(self, text):
self.text = text
self.ALL_INSTANCES[self.text] = self
a1 = A("hello")
a2 = A("world")
print A.ALL_INSTANCES["hello"]
output:
<__main__.A object at 0x00B7EA50>

Categories

Resources