Recursive DotDict - python

I have a utility class that makes Python dictionaries behave somewhat like JavaScript objects as far as getting and setting attributes.
class DotDict(dict):
"""
a dictionary that supports dot notation
as well as dictionary access notation
usage: d = DotDict() or d = DotDict({'val1':'first'})
set attributes: d.val2 = 'second' or d['val2'] = 'second'
get attributes: d.val2 or d['val2']
"""
__getattr__ = dict.__getitem__
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
I would like to make it so it also converts nested dictionaries into DotDict() instances. I was hoping to be able to do something like this with __init__ or __new__, but I haven't come up with anything that works:
def __init__(self, dct):
for key in dct.keys():
if hasattr(dct[key], 'keys'):
dct[key] = DotDict(dct[key])
How can I recursively convert the nested dictionaries into DotDict() instances?
>>> dct = {'scalar_value':1, 'nested_dict':{'value':2}}
>>> dct = DotDict(dct)
>>> print dct
{'scalar_value': 1, 'nested_dict': {'value': 2}}
>>> print type(dct)
<class '__main__.DotDict'>
>>> print type(dct['nested_dict'])
<type 'dict'>

I don't see where you are copying the values in the constructor. Here DotDict is always empty because of that. When I added the key assignment, it worked:
class DotDict(dict):
"""
a dictionary that supports dot notation
as well as dictionary access notation
usage: d = DotDict() or d = DotDict({'val1':'first'})
set attributes: d.val2 = 'second' or d['val2'] = 'second'
get attributes: d.val2 or d['val2']
"""
__getattr__ = dict.__getitem__
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __init__(self, dct):
for key, value in dct.items():
if hasattr(value, 'keys'):
value = DotDict(value)
self[key] = value
dct = {'scalar_value':1, 'nested_dict':{'value':2, 'nested_nested': {'x': 21}}}
dct = DotDict(dct)
print dct.nested_dict.nested_nested.x
It looks a bit dangerous and error prone, not to mention source of countless surprises to other developers, but seems to be working.

Shamelessly plugging my own package
There is a package doing exactly what you want and also something more and it is called Prodict.
from prodict import Prodict
life_dict = {'bigBang':
{'stars':
{'planets': []}
}
}
life = Prodict.from_dict(life_dict)
print(life.bigBang.stars.planets)
# prints []
# you can even add new properties dynamically
life.bigBang.galaxies = []
PS 1: I'm the author of the Prodict.
PS 2: This is a direct copy paste of an answer of another question.

I've been slightly unhappy with all the different answers I have found to this problem. My goals in my implementation were:
1) Don't create more new object attributes than necessary.
2) Don't allow overwriting access to built-in attributes.
3) The class converts added items to maintain consistency.
class attrdict(dict):
"""
Attribute Dictionary.
Enables getting/setting/deleting dictionary keys via attributes.
Getting/deleting a non-existent key via attribute raises `AttributeError`.
Objects are passed to `__convert` before `dict.__setitem__` is called.
This class rebinds `__setattr__` to call `dict.__setitem__`. Attributes
will not be set on the object, but will be added as keys to the dictionary.
This prevents overwriting access to built-in attributes. Since we defined
`__getattr__` but left `__getattribute__` alone, built-in attributes will
be returned before `__getattr__` is called. Be careful::
>>> a = attrdict()
>>> a['key'] = 'value'
>>> a.key
'value'
>>> a['keys'] = 'oops'
>>> a.keys
<built-in method keys of attrdict object at 0xabcdef123456>
Use `'key' in a`, not `hasattr(a, 'key')`, as a consequence of the above.
"""
def __init__(self, *args, **kwargs):
# We trust the dict to init itself better than we can.
dict.__init__(self, *args, **kwargs)
# Because of that, we do duplicate work, but it's worth it.
for k, v in self.iteritems():
self.__setitem__(k, v)
def __getattr__(self, k):
try:
return dict.__getitem__(self, k)
except KeyError:
# Maintain consistent syntactical behaviour.
raise AttributeError(
"'attrdict' object has no attribute '" + str(k) + "'"
)
def __setitem__(self, k, v):
dict.__setitem__(self, k, attrdict.__convert(v))
__setattr__ = __setitem__
def __delattr__(self, k):
try:
dict.__delitem__(self, k)
except KeyError:
raise AttributeError(
"'attrdict' object has no attribute '" + str(k) + "'"
)
#staticmethod
def __convert(o):
"""
Recursively convert `dict` objects in `dict`, `list`, `set`, and
`tuple` objects to `attrdict` objects.
"""
if isinstance(o, dict):
o = attrdict(o)
elif isinstance(o, list):
o = list(attrdict.__convert(v) for v in o)
elif isinstance(o, set):
o = set(attrdict.__convert(v) for v in o)
elif isinstance(o, tuple):
o = tuple(attrdict.__convert(v) for v in o)
return o

The accepted answer has some gotchas, such as failing on hasattr(). Using the keys to simulate properties means you need to do a tad more than assign __getattr__ = dict.__getitem__. Here's a more robust implementation with tests:
from collections import OrderedDict, Mapping
class DotDict(OrderedDict):
'''
Quick and dirty implementation of a dot-able dict, which allows access and
assignment via object properties rather than dict indexing.
'''
def __init__(self, *args, **kwargs):
# we could just call super(DotDict, self).__init__(*args, **kwargs)
# but that won't get us nested dotdict objects
od = OrderedDict(*args, **kwargs)
for key, val in od.items():
if isinstance(val, Mapping):
value = DotDict(val)
else:
value = val
self[key] = value
def __delattr__(self, name):
try:
del self[name]
except KeyError as ex:
raise AttributeError(f"No attribute called: {name}") from ex
def __getattr__(self, k):
try:
return self[k]
except KeyError as ex:
raise AttributeError(f"No attribute called: {k}") from ex
__setattr__ = OrderedDict.__setitem__
And the tests:
class DotDictTest(unittest.TestCase):
def test_add(self):
exp = DotDict()
# test that it's not there
self.assertFalse(hasattr(exp, 'abc'))
with self.assertRaises(AttributeError):
_ = exp.abc
with self.assertRaises(KeyError):
_ = exp['abc']
# assign and test that it is there
exp.abc = 123
self.assertTrue(hasattr(exp, 'abc'))
self.assertTrue('abc' in exp)
self.assertEqual(exp.abc, 123)
def test_delete_attribute(self):
exp = DotDict()
# not there
self.assertFalse(hasattr(exp, 'abc'))
with self.assertRaises(AttributeError):
_ = exp.abc
# set value
exp.abc = 123
self.assertTrue(hasattr(exp, 'abc'))
self.assertTrue('abc' in exp)
self.assertEqual(exp.abc, 123)
# delete attribute
delattr(exp, 'abc')
# not there
self.assertFalse(hasattr(exp, 'abc'))
with self.assertRaises(AttributeError):
delattr(exp, 'abc')
def test_delete_key(self):
exp = DotDict()
# not there
self.assertFalse('abc' in exp)
with self.assertRaises(KeyError):
_ = exp['abc']
# set value
exp['abc'] = 123
self.assertTrue(hasattr(exp, 'abc'))
self.assertTrue('abc' in exp)
self.assertEqual(exp.abc, 123)
# delete key
del exp['abc']
# not there
with self.assertRaises(KeyError):
del exp['abc']
def test_change_value(self):
exp = DotDict()
exp.abc = 123
self.assertEqual(exp.abc, 123)
self.assertEqual(exp.abc, exp['abc'])
# change attribute
exp.abc = 456
self.assertEqual(exp.abc, 456)
self.assertEqual(exp.abc, exp['abc'])
# change key
exp['abc'] = 789
self.assertEqual(exp.abc, 789)
self.assertEqual(exp.abc, exp['abc'])
def test_DotDict_dict_init(self):
exp = DotDict({'abc': 123, 'xyz': 456})
self.assertEqual(exp.abc, 123)
self.assertEqual(exp.xyz, 456)
def test_DotDict_named_arg_init(self):
exp = DotDict(abc=123, xyz=456)
self.assertEqual(exp.abc, 123)
self.assertEqual(exp.xyz, 456)
def test_DotDict_datatypes(self):
exp = DotDict({'intval': 1, 'listval': [1, 2, 3], 'dictval': {'a': 1}})
self.assertEqual(exp.intval, 1)
self.assertEqual(exp.listval, [1, 2, 3])
self.assertEqual(exp.listval[0], 1)
self.assertEqual(exp.dictval, {'a': 1})
self.assertEqual(exp.dictval['a'], 1)
self.assertEqual(exp.dictval.a, 1) # nested dotdict works
And just for fun, you can turn an object into a DotDict with this:
def to_dotdict(obj):
''' Converts an object to a DotDict '''
if isinstance(obj, DotDict):
return obj
elif isinstance(obj, Mapping):
return DotDict(obj)
else:
result = DotDict()
for name in dir(obj):
value = getattr(obj, name)
if not name.startswith('__') and not inspect.ismethod(value):
result[name] = value
return result

Related

pythonic way to index list of objects

I have a list of objects. Each object has two fields
obj1.status = 2
obj1.timestamp = 19211
obj2.status = 3
obj2.timestamp = 14211
obj_list = [obj1, obj2]
I will keep adding / deleting objects in the list and also changing attributes of objects, for example I may change ob1.status to 5.
Now I have two dicts
dict1 - <status, object>
dict2 - <timestamp, object>
How do I design a simple solution so that whenever I modify/delete/insert elements in the list, the maps get automatically updated. I am interested in a pythonic solution that is elegant and extensible. For example in future, I should be able to easily add another attribute and dict for that as well
Also for simplicity, let us assume all attributes value are different. For example no two objects will have same status
You could override the __setattr__ on the objects to update the indexes whenever you set the values. You can use a weakref dictionary for the indexes so that when you delete objects and are no longer using them, they are automatically removed from the indexes.
import weakref
from bunch import Bunch
class MyObject(object):
indexes = Bunch() # Could just use dict()
def __init__(self, **kwargs):
super(MyObject, self).__init__()
for k, v in kwargs.items():
setattr(self, k, v)
def __setattr__(self, name, value):
try:
index = MyObject.indexes[name]
except KeyError:
index = weakref.WeakValueDictionary()
MyObject.indexes[name] = index
try:
old_val = getattr(self, name)
del index[old_val]
except (KeyError, AttributeError):
pass
object.__setattr__(self, name, value)
index[value] = self
obj1 = MyObject(status=1, timestamp=123123)
obj2 = MyObject(status=2, timestamp=2343)
print MyObject.indexes.status[1]
print obj1.indexes.timestamp[2343]
obj1.status = 5
print obj2.indexes['status'][5]
I used a Bunch here because it allows you to access the indexes using .name notation, but you could just use a dict instead and use the ['name'] syntax.
One approach here would be to create a class level dict for MyObj and define updating behavior using property decorator. Every time an object is changed or added, it is reflected in the respected dictionaries associated with the class.
Edit: as #BrendanAbel points out, using weakref.WeakValueDictionary in place of dict handles object deletion from class level dicts.
from datetime import datetime
from weakref import WeakValueDictionary
DEFAULT_TIME = datetime.now()
class MyObj(object):
"""
A sample clone of your object
"""
timestamps = WeakValueDictionary()
statuses = WeakValueDictionary()
def __init__(self, status=0, timestamp=DEFAULT_TIME):
self._status = status
self._timestamp = timestamp
self.status = status
self.timestamp = timestamp
def __update_class(self):
MyObj.timestamps.update({self.timestamp: self})
MyObj.statuses.update({self.status: self})
def __delete_from_class(self):
maybe_self = MyObj.statuses.get(self.status, None)
if maybe_self is self is not None:
del MyObj.statuses[self.status]
maybe_self = MyObj.timestamps.get(self.timestamp, None)
if maybe_self is self is not None:
del MyObj.timestamps[self.timestamp]
#property
def status(self):
return self._status
#status.setter
def status(self, val):
self.__delete_from_class()
self._status = val
self.__update_class()
#property
def timestamp(self):
return self._timestamp
#timestamp.setter
def timestamp(self, val):
self.__delete_from_class()
self._timestamp = val
self.__update_class()
def __repr__(self):
return "MyObj: status={} timestamp={}".format(self.status, self.timestamp)
obj1 = MyObj(1)
obj2 = MyObj(2)
obj3 = MyObj(3)
lst = [obj1, obj2, obj3]
# In [87]: q.lst
# Out[87]:
# [MyObj: status=1 timestamp=2016-05-27 13:43:38.158363,
# MyObj: status=2 timestamp=2016-05-27 13:43:38.158363,
# MyObj: status=3 timestamp=2016-05-27 13:43:38.158363]
# In [88]: q.MyObj.statuses[1]
# Out[88]: MyObj: status=1 timestamp=2016-05-27 13:43:38.158363
# In [89]: q.MyObj.statuses[1].status = 42
# In [90]: q.MyObj.statuses[42]
# Out[90]: MyObj: status=42 timestamp=2016-05-27 13:43:38.158363
# In [91]: q.MyObj.statuses[1]
# ---------------------------------------------------------------------------
# KeyError Traceback (most recent call last)
# <ipython-input-91-508ab072bfc4> in <module>()
# ----> 1 q.MyObj.statuses[1]
# KeyError: 1
For a collection to be aware of mutation of its elements, there must be some connection between the elements and that collection which can communicate when changes happen. For this reason, we either must bind an instance to a collection or proxy the elements of the collection so that change-communication doesn't leak into the element's code.
A note about the implementation I'm going to present, the proxying method only works if the attributes are changed by direct setting, not inside of a method. A more complex book-keeping system would be necessary then.
Additionally, it assumes that exact duplicates of all attributes won't exist, given that you require the indices be built out of set objects instead of list
from collections import defaultdict
class Proxy(object):
def __init__(self, proxy, collection):
self._proxy = proxy
self._collection = collection
def __getattribute__(self, name):
if name in ("_proxy", "_collection"):
return object.__getattribute__(self, name)
else:
proxy = self._proxy
return getattr(proxy, name)
def __setattr__(self, name, value):
if name in ("_proxy", "collection"):
object.__setattr__(self, name, value)
else:
proxied = self._proxy
collection = self._collection
old = getattr(proxied, name)
setattr(proxy, name, value)
collection.signal_change(proxied, name, old, value)
class IndexedCollection(object):
def __init__(self, items, index_names):
self.items = list(items)
self.index_names = set(index_names)
self.indices = defaultdict(lambda: defaultdict(set))
def __len__(self):
return len(self.items)
def __iter__(self):
for i in range(len(self)):
yield self[i]
def remove(self, obj):
self.items.remove(obj)
self._remove_from_indices(obj)
def __getitem__(self, i):
# Ensure consumers get a proxy, not a raw object
return Proxy(self.items[i], self)
def append(self, obj):
self.items.append(obj)
self._add_to_indices(obj)
def _add_to_indices(self, obj):
for indx in self.index_names:
key = getattr(obj, indx)
self.indices[indx][key].add(obj)
def _remove_from_indices(self, obj):
for indx in self.index_names:
key = getattr(obj, indx)
self.indices[indx][key].remove(obj)
def signal_change(self, obj, indx, old, new):
if indx not in self.index_names:
return
# Tell the container to update its indices for a
# particular attribute and object
self.indices[indx][old].remove(obj)
self.indices[indx][new].add(obj)
I am not sure if this is what you are asking for but ...
Objects:
import operator
class Foo(object):
def __init__(self):
self.one = 1
self.two = 2
f = Foo()
f.name = 'f'
g = Foo()
g.name = 'g'
h = Foo()
h.name = 'h'
name = operator.attrgetter('name')
lists: a initially contains f and b initially contains h
a = [f]
b = [h]
dictionaries: each with one item whose value is one of the lists
d1 = {1:a}
d2 = {1:b}
d1[1] is list a which contains f and f.one is 1
>>> d1
{1: [<__main__.Foo object at 0x03F4CA50>]}
>>> name(d1[1][0])
'f'
>>> name(d1[1][0]), d1[1][0].one
('f', 1)
changing f.one is seen in the dictionary
>>> f.one = '?'
>>> name(d1[1][0]), d1[1][0].one
('f', '?')
>>>
d2[1] is list b which contains h
>>> d2
{1: [<__main__.Foo object at 0x03F59070>]}
>>> name(d2[1][0]), d2[1][0].one
('h', 1)
Add an object to b and it is seen in the dictionary
>>> b.append(g)
>>> b
[<__main__.Foo object at 0x03F59070>, <__main__.Foo object at 0x03F4CAF0>]
>>> d2
{1: [<__main__.Foo object at 0x03F59070>, <__main__.Foo object at 0x03F4CAF0>]}
>>> name(d2[1][1]), d2[1][1].one
('g', 1)

Python make dictionary items accessible as object property

This is mostly syntactic sugar but I'd like to access the items of a dictionary as object properties.
Example:
class CoolThing():
def __init__(self):
self.CoolDict = {'a': 1, 'b': 2}
and I'd like to have
my_cool_thing.a # => 1
my_cool_thing.b # => 2
Edit: some code of a potential solution with a nested structure with dot notation: device.property.field
class Parameters():
def __init__(self, ids, devices):
self._ids = ids
self._devices = devices
for p in self._devices:
p = p[0]
if self.__dict__.get(p.device) is None:
self.__dict__[p.device] = SmartDict()
else:
if self.__dict__[p.device].get(p.property) is None:
self.__dict__[p.device][p.property] = SmartDict()
else:
if self.__dict__[p.device][p.property].get(p.field) is None:
self.__dict__[p.device][p.property][p.field] = ParameterData(p)
class SmartDict():
def __init__(self):
self.__dict__ = {}
def __getitem__(self, k):
return self.__dict__[k]
def __setitem__(self, k, v):
self.__dict__[k] = v
def get(self, k):
return self.__dict__.get(k)
def __len__(self):
return len(self.__dict__)
You want __getattr__ and __setattr__, though you'll have to roll your own class (I'm not aware of any builtins, though namedtuple might work if you don't need to change values much)
class AttrDict(dict):
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, value):
self[attr] = value
If you just want to access a sub-dictionary that way, you just change self to self.cool_dict
class CoolThing:
def __init__(self):
self.cool_dict = {'a': 1, 'b': 2}
def __getattr__(self, attr):
return self.cool_dict[attr]
def __setattr__(self, attr, value):
# Note, you'll have to do this for anything that you want to set
# in __init__.
if attr == 'cool_dict':
super().__setattr__(attr, value)
else:
self.cool_dict[attr] = value
Note that __getattr__ is used after any other lookups fail, but if you want to ensure that your function is called first, you can use __getattribute__
Also note that self.cool_dict does not exist on CoolThing until after __init__ is called. My initial version of this would throw a maximum recursion depth exceeded, because as you created the class it would go to set self.cool_dict in init, call __setattr__, which would try to get self.cool_dict so it could set [attr] = value on it. Naturally it can't find cool_dict yet, and so it will try to call __getattr__ again... which can't find cool_dict and round and round it goes.
Another option would be to use a class-level variable instead, but that's probably not at all what you want :)
CoolDict already exists, it's named __dict__:
>>> class CoolThing(object):
... def __init__(self):
... self.__dict__['a'] = 1
... self.__dict__['b'] = 2
...
>>> thing = CoolThing()
>>> thing.a
1
>>> thing.b
2
>>> thing.c = 3
>>> thing.__dict__
{'a': 1, 'b': 2, 'c': 3}

Setup dictionary lazily

Let's say I have this dictionary in python, defined at the module level (mysettings.py):
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
...
}
I would like those values to be computed when the keys are accessed:
from mysettings import settings # settings is only "prepared"
print settings['expensive1'] # Now the value is really computed.
Is this possible? How?
Don't inherit build-in dict. Even if you overwrite dict.__getitem__() method, dict.get() would not work as you expected.
The right way is to inherit abc.Mapping from collections.
from collections.abc import Mapping
class LazyDict(Mapping):
def __init__(self, *args, **kw):
self._raw_dict = dict(*args, **kw)
def __getitem__(self, key):
func, arg = self._raw_dict.__getitem__(key)
return func(arg)
def __iter__(self):
return iter(self._raw_dict)
def __len__(self):
return len(self._raw_dict)
Then you can do:
settings = LazyDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
I also list sample code and examples here: https://gist.github.com/gyli/9b50bb8537069b4e154fec41a4b5995a
If you don't separe the arguments from the callable, I don't think it's possible. However, this should work:
class MySettingsDict(dict):
def __getitem__(self, item):
function, arg = dict.__getitem__(self, item)
return function(arg)
def expensive_to_compute(arg):
return arg * 3
And now:
>>> settings = MySettingsDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
>>> settings['expensive1']
3
>>> settings['expensive2']
6
Edit:
You may also want to cache the results of expensive_to_compute, if they are to be accessed multiple times. Something like this
class MySettingsDict(dict):
def __getitem__(self, item):
value = dict.__getitem__(self, item)
if not isinstance(value, int):
function, arg = value
value = function(arg)
dict.__setitem__(self, item, value)
return value
And now:
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2),
(<function expensive_to_compute at 0x9b0a62c>, 1)])
>>> settings['expensive1']
3
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2), 3])
You may also want to override other dict methods depending of how you want to use the dict.
Store references to the functions as the values for the keys i.e:
def A():
return "that took ages"
def B():
return "that took for-ever"
settings = {
"A": A,
"B": B,
}
print(settings["A"]())
This way, you only evaluate the function associated with a key when you access it and invoke it. A suitable class which can handle having non-lazy values would be:
import types
class LazyDict(dict):
def __getitem__(self,key):
item = dict.__getitem__(self,key)
if isinstance(item,types.FunctionType):
return item()
else:
return item
usage:
settings = LazyDict([("A",A),("B",B)])
print(settings["A"])
>>>
that took ages
You can make expensive_to_compute a generator function:
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
}
Then try:
from mysettings import settings
print next(settings['expensive1'])
I would populate the dictionary values with callables and change them to the result upon reading.
class LazyDict(dict):
def __getitem__(self, k):
v = super().__getitem__(k)
if callable(v):
v = v()
super().__setitem__(k, v)
return v
def get(self, k, default=None):
if k in self:
return self.__getitem__(k)
return default
Then with
def expensive_to_compute(arg):
print('Doing heavy stuff')
return arg * 3
you can do:
>>> settings = LazyDict({
'expensive1': lambda: expensive_to_compute(1),
'expensive2': lambda: expensive_to_compute(2),
})
>>> settings.__repr__()
"{'expensive1': <function <lambda> at 0x000001A0BA2B8EA0>, 'expensive2': <function <lambda> at 0x000001A0BA2B8F28>}"
>>> settings['expensive1']
Doing heavy stuff
3
>>> settings.get('expensive2')
Doing heavy stuff
6
>>> settings.__repr__()
"{'expensive1': 3, 'expensive2': 6}"
I recently needed something similar. Mixing both strategies from Guangyang Li and michaelmeyer, here is how I did it:
class LazyDict(MutableMapping):
"""Lazily evaluated dictionary."""
function = None
def __init__(self, *args, **kargs):
self._dict = dict(*args, **kargs)
def __getitem__(self, key):
"""Evaluate value."""
value = self._dict[key]
if not isinstance(value, ccData):
value = self.function(value)
self._dict[key] = value
return value
def __setitem__(self, key, value):
"""Store value lazily."""
self._dict[key] = value
def __delitem__(self, key):
"""Delete value."""
return self._dict[key]
def __iter__(self):
"""Iterate over dictionary."""
return iter(self._dict)
def __len__(self):
"""Evaluate size of dictionary."""
return len(self._dict)
Let's lazily evaluate the following function:
def expensive_to_compute(arg):
return arg * 3
The advantage is that the function is yet to be defined within the object and the arguments are the ones actually stored (which is what I needed):
>>> settings = LazyDict({'expensive1': 1, 'expensive2': 2})
>>> settings.function = expensive_to_compute # function unknown until now!
>>> settings['expensive1']
3
>>> settings['expensive2']
6
This approach works with a single function only.
I can point out the following advantages:
implements the complete MutableMapping API
if your function is non-deterministic, you can reset a value to re-evaluate
pass in a function to generate the values on the first attribute get:
class LazyDict(dict):
""" Fill in the values of a dict at first access """
def __init__(self, fn, *args, **kwargs):
self._fn = fn
self._fn_args = args or []
self._fn_kwargs = kwargs or {}
return super(LazyDict, self).__init__()
def _fn_populate(self):
if self._fn:
self._fn(self, *self._fn_args, **self._fn_kwargs)
self._fn = self._fn_args = self._fn_kwargs = None
def __getattribute__(self, name):
if not name.startswith('_fn'):
self._fn_populate()
return super(LazyDict, self).__getattribute__(name)
def __getitem__(self, item):
self._fn_populate()
return super(LazyDict, self).__getitem__(item)
>>> def _fn(self, val):
... print 'lazy loading'
... self['foo'] = val
...
>>> d = LazyDict(_fn, 'bar')
>>> d
{}
>>> d['foo']
lazy loading
'bar'
>>>
Alternatively, one can use the LazyDictionary package that creates a thread-safe lazy dictionary.
Installation:
pip install lazydict
Usage:
from lazydict import LazyDictionary
import tempfile
lazy = LazyDictionary()
lazy['temp'] = lambda: tempfile.mkdtemp()

Making custom containers work with **kwargs (how does Python expand the args?)

I have a custom container class in Python 2.7, and everything works as expected except if I pass try to expand an instance as **kwargs for a function:
cm = ChainableMap({'a': 1})
cm['b'] = 2
assert cm == {'a': 1, 'b': 2} # Is fine
def check_kwargs(**kwargs):
assert kwargs == {'a': 1, 'b': 2}
check_kwargs(**cm) # Raises AssertionError
I've overridden __getitem__, __iter__, iterkeys, keys, items, and iteritems, (and __eq__ and __repr__) yet none of them seem to be involved in the expansion as **kwargs, what am I doing wrong?
Edit - The working updated source that now inherits from MutableMapping and adds the missing methods:
from itertools import chain
from collections import MutableMapping
class ChainableMap(MutableMapping):
"""
A mapping object with a delegation chain similar to JS object prototypes::
>>> parent = {'a': 1}
>>> child = ChainableMap(parent)
>>> child.parent is parent
True
Failed lookups delegate up the chain to self.parent::
>>> 'a' in child
True
>>> child['a']
1
But modifications will only affect the child::
>>> child['b'] = 2
>>> child.keys()
['a', 'b']
>>> parent.keys()
['a']
>>> child['a'] = 10
>>> parent['a']
1
Changes in the parent are also reflected in the child::
>>> parent['c'] = 3
>>> sorted(child.keys())
['a', 'b', 'c']
>>> expect = {'a': 10, 'b': 2, 'c': 3}
>>> assert child == expect, "%s != %s" % (child, expect)
Unless the child is already masking out a certain key::
>>> del parent['a']
>>> parent.keys()
['c']
>>> assert child == expect, "%s != %s" % (child, expect)
However, this doesn't work::
>>> def print_sorted(**kwargs):
... for k in sorted(kwargs.keys()):
... print "%r=%r" % (k, kwargs[k])
>>> child['c'] == 3
True
>>> print_sorted(**child)
'a'=10
'b'=2
'c'=3
"""
__slots__ = ('_', 'parent')
def __init__(self, parent, **data):
self.parent = parent
self._ = data
def __getitem__(self, key):
try:
return self._[key]
except KeyError:
return self.parent[key]
def __iter__(self):
return self.iterkeys()
def __setitem__(self, key, val):
self._[key] = val
def __delitem__(self, key):
del self._[key]
def __len__(self):
return len(self.keys())
def keys(self, own=False):
return list(self.iterkeys(own))
def items(self, own=False):
return list(self.iteritems(own))
def iterkeys(self, own=False):
if own:
for k in self._.iterkeys():
yield k
return
yielded = set([])
for k in chain(self.parent.iterkeys(), self._.iterkeys()):
if k in yielded:
continue
yield k
yielded.add(k)
def iteritems(self, own=False):
for k in self.iterkeys(own):
yield k, self[k]
def __eq__(self, other):
return sorted(self.iteritems()) == sorted(other.iteritems())
def __repr__(self):
return dict(self.iteritems()).__repr__()
def __contains__(self, key):
return key in self._ or key in self.parent
def containing(self, key):
"""
Return the ancestor that directly contains ``key``
>>> p2 = {'a', 2}
>>> p1 = ChainableMap(p2)
>>> c = ChainableMap(p1)
>>> c.containing('a') is p2
True
"""
if key in self._:
return self
elif hasattr(self.parent, 'containing'):
return self.parent.containing(key)
elif key in self.parent:
return self.parent
def get(self, key, default=None):
"""
>>> c = ChainableMap({'a': 1})
>>> c.get('a')
1
>>> c.get('b', 'default')
'default'
"""
if key in self:
return self[key]
else:
return default
def pushdown(self, top):
"""
Pushes a new mapping onto the top of the delegation chain:
>>> parent = {'a': 10}
>>> child = ChainableMap(parent)
>>> top = {'a': 'apple', 'b': 'beer', 'c': 'cheese'}
>>> child.pushdown(top)
>>> assert child == top
This creates a new ChainableMap with the contents of ``child`` and makes it
the new parent (the old parent becomes the grandparent):
>>> child.parent.parent is parent
True
>>> del child['a']
>>> child['a'] == 10
True
"""
old = ChainableMap(self.parent)
for k, v in self.items(True):
old[k] = v
del self[k]
self.parent = old
for k, v in top.iteritems():
self[k] = v
When creating a keyword argument dictionary, the behavior is the same as passing your object into the dict() initializer, which results in the dict {'b': 2} for your cm object:
>>> cm = ChainableMap({'a': 1})
>>> cm['b'] = 2
>>> dict(cm)
{'b': 2}
A more detailed explanation of why this is the case is below, but the summary is that your mapping is converted to a Python dictionary in C code which does some optimization if the argument is itself another dict, by bypassing the Python function calls and inspecting the underlying C object directly.
There are a few ways to approach the solution for this, either make sure that the underlying dict contains everything you want, or stop inheriting from dict (which will require other changes as well, at the very least a __setitem__ method).
edit: It sounds like BrenBarn's suggestion to inherit from collections.MutableMapping instead of dict did the trick.
You could accomplish the first method pretty simply by just adding self.update(parent) to ChainableMap.__init__(), but I'm not sure if that will cause other side effects to the behavior of your class.
Explanation of why dict(cm) gives {'b': 2}:
Check out the following CPython code for the dict object:
http://hg.python.org/releasing/2.7.3/file/7bb96963d067/Objects/dictobject.c#l1522
When dict(cm) is called (and when keyword arguments are unpacked), the PyDict_Merge function is called with cm as the b parameter. Because ChainableMap inherits from dict, the if statement at line 1539 is entered:
if (PyDict_Check(b)) {
other = (PyDictObject*)b;
...
From there on, items from other are added to the new dict that is being created by accessing the C object directly, which bypasses all of the methods that you overwrote.
This means that any items in a ChainableMap instance that are accessed through the parent attribute will not be added to the new dictionary created by dict() or keyword argument unpacking.

How to implement an ordered, default dict?

I would like to combine OrderedDict() and defaultdict() from collections in one object, which shall be an ordered, default dict.
Is this possible?
The following (using a modified version of this recipe) works for me:
from collections import OrderedDict, Callable
class DefaultOrderedDict(OrderedDict):
# Source: http://stackoverflow.com/a/6190500/562769
def __init__(self, default_factory=None, *a, **kw):
if (default_factory is not None and
not isinstance(default_factory, Callable)):
raise TypeError('first argument must be callable')
OrderedDict.__init__(self, *a, **kw)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return OrderedDict.__getitem__(self, key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
self[key] = value = self.default_factory()
return value
def __reduce__(self):
if self.default_factory is None:
args = tuple()
else:
args = self.default_factory,
return type(self), args, None, None, self.items()
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
import copy
return type(self)(self.default_factory,
copy.deepcopy(self.items()))
def __repr__(self):
return 'OrderedDefaultDict(%s, %s)' % (self.default_factory,
OrderedDict.__repr__(self))
Here is another possibility, inspired by Raymond Hettinger's super() Considered Super, tested on Python 2.7.X and 3.4.X:
from collections import OrderedDict, defaultdict
class OrderedDefaultDict(OrderedDict, defaultdict):
def __init__(self, default_factory=None, *args, **kwargs):
#in python3 you can omit the args to super
super(OrderedDefaultDict, self).__init__(*args, **kwargs)
self.default_factory = default_factory
If you check out the class's MRO (aka, help(OrderedDefaultDict)), you'll see this:
class OrderedDefaultDict(collections.OrderedDict, collections.defaultdict)
| Method resolution order:
| OrderedDefaultDict
| collections.OrderedDict
| collections.defaultdict
| __builtin__.dict
| __builtin__.object
meaning that when an instance of OrderedDefaultDict is initialized, it defers to the OrderedDict's init, but this one in turn will call the defaultdict's methods before calling __builtin__.dict, which is precisely what we want.
If you want a simple solution that doesn't require a class, you can just use OrderedDict.setdefault(key, default=None) or OrderedDict.get(key, default=None). If you only get / set from a few places, say in a loop, you can easily just setdefault.
totals = collections.OrderedDict()
for i, x in some_generator():
totals[i] = totals.get(i, 0) + x
It is even easier for lists with setdefault:
agglomerate = collections.OrderedDict()
for i, x in some_generator():
agglomerate.setdefault(i, []).append(x)
But if you use it more than a few times, it is probably better to set up a class, like in the other answers.
Here's another solution to think about if your use case is simple like mine and you don't necessarily want to add the complexity of a DefaultOrderedDict class implementation to your code.
from collections import OrderedDict
keys = ['a', 'b', 'c']
items = [(key, None) for key in keys]
od = OrderedDict(items)
(None is my desired default value.)
Note that this solution won't work if one of your requirements is to dynamically insert new keys with the default value. A tradeoff of simplicity.
Update 3/13/17 - I learned of a convenience function for this use case. Same as above but you can omit the line items = ... and just:
od = OrderedDict.fromkeys(keys)
Output:
OrderedDict([('a', None), ('b', None), ('c', None)])
And if your keys are single characters, you can just pass one string:
OrderedDict.fromkeys('abc')
This has the same output as the two examples above.
You can also pass a default value as the second arg to OrderedDict.fromkeys(...).
Another simple approach would be to use dictionary get method
>>> from collections import OrderedDict
>>> d = OrderedDict()
>>> d['key'] = d.get('key', 0) + 1
>>> d['key'] = d.get('key', 0) + 1
>>> d
OrderedDict([('key', 2)])
>>>
A simpler version of #zeekay 's answer is:
from collections import OrderedDict
class OrderedDefaultListDict(OrderedDict): #name according to default
def __missing__(self, key):
self[key] = value = [] #change to whatever default you want
return value
A simple and elegant solution building on #NickBread.
Has a slightly different API to set the factory, but good defaults are always nice to have.
class OrderedDefaultDict(OrderedDict):
factory = list
def __missing__(self, key):
self[key] = value = self.factory()
return value
I created slightly fixed and more simplified version of the accepted answer, actual for python 3.7.
from collections import OrderedDict
from copy import copy, deepcopy
import pickle
from typing import Any, Callable
class DefaultOrderedDict(OrderedDict):
def __init__(
self,
default_factory: Callable[[], Any],
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
self[key] = value = self.default_factory()
return value
def __reduce__(self):
return type(self), (self.default_factory, ), None, None, iter(self.items())
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
return type(self)(self.default_factory, deepcopy(tuple(self.items()), memo))
def __repr__(self):
return f'{self.__class__.__name__}({self.default_factory}, {OrderedDict(self).__repr__()})'
And, that may be even more important, provided some tests.
a = DefaultOrderedDict(list)
# testing default
assert a['key'] == []
a['key'].append(1)
assert a['key'] == [1, ]
# testing repr
assert repr(a) == "DefaultOrderedDict(<class 'list'>, OrderedDict([('key', [1])]))"
# testing copy
b = a.copy()
assert b['key'] is a['key']
c = copy(a)
assert c['key'] is a['key']
d = deepcopy(a)
assert d['key'] is not a['key']
assert d['key'] == a['key']
# testing pickle
saved = pickle.dumps(a)
restored = pickle.loads(saved)
assert restored is not a
assert restored == a
# testing order
a['second_key'] = [2, ]
a['key'] = [3, ]
assert list(a.items()) == [('key', [3, ]), ('second_key', [2, ])]
Inspired by other answers on this thread, you can use something like,
from collections import OrderedDict
class OrderedDefaultDict(OrderedDict):
def __missing__(self, key):
value = OrderedDefaultDict()
self[key] = value
return value
I would like to know if there're any downsides of initializing another object of the same class in the missing method.
i tested the default dict and discovered it's also sorted!
maybe it was just a coincidence but anyway you can use the sorted function:
sorted(s.items())
i think it's simpler

Categories

Resources