How to implement insert for OrderedDict in python 3 - python

I want to insert an item into an OrderedDict at a certain position.
Using the gist of this SO answer i have the problem that it doesn't work on python 3.
This is the implementation used
from collections import OrderedDict
class ListDict(OrderedDict):
def __init__(self, *args, **kwargs):
super(ListDict, self).__init__(*args, **kwargs)
def __insertion(self, link_prev, key_value):
key, value = key_value
if link_prev[2] != key:
if key in self:
del self[key]
link_next = link_prev[1]
self._OrderedDict__map[key] = link_prev[1] = link_next[0] = [link_prev, link_next, key]
dict.__setitem__(self, key, value)
def insert_after(self, existing_key, key_value):
self.__insertion(self._OrderedDict__map[existing_key], key_value)
def insert_before(self, existing_key, key_value):
self.__insertion(self._OrderedDict__map[existing_key][0], key_value)
Using it like
ld = ListDict([(1,1), (2,2), (3,3)])
ld.insert_before(2, (1.5, 1.5))
gives
File "...", line 35, in insert_before
self.__insertion(self._OrderedDict__map[existing_key][0], key_value)
AttributeError: 'ListDict' object has no attribute '_OrderedDict__map'
It works with python 2.7. What is the reason that it fails in python 3?
Checking the source code of the OrderedDict implementation shows that self.__map is used instead of self._OrderedDict__map. Changing the code to the usage of self.__map gives
AttributeError: 'ListDict' object has no attribute '_ListDict__map'
How come? And how can i make this work in python 3? OrderedDict uses the internal __map attribute to store a doubly linked list. So how can i access this attribute properly?

I'm not sure you wouldn't be better served just keeping up with a separate list and dict in your code, but here is a stab at a pure Python implementation of such an object. This will be an order of magnitude slower than an actual OrderedDict in Python 3.5, which as I pointed out in my comment has been rewritten in C.
"""
A list/dict hybrid; like OrderedDict with insert_before and insert_after
"""
import collections.abc
class MutableOrderingDict(collections.abc.MutableMapping):
def __init__(self, iterable_or_mapping=None, **kw):
# This mimics dict's initialization and accepts the same arguments
# Of course, you have to pass an ordered iterable or mapping unless you
# want the order to be arbitrary. Garbage in, garbage out and all :)
self.__data = {}
self.__keys = []
if iterable_or_mapping is not None:
try:
iterable = iterable_or_mapping.items()
except AttributeError:
iterable = iterable_or_mapping
for key, value in iterable:
self.__keys.append(key)
self.__data[key] = value
for key, value in kw.items():
self.__keys.append(key)
self.__data[key] = value
def insert_before(self, key, new_key, value):
try:
self.__keys.insert(self.__keys.index(key), new_key)
except ValueError:
raise KeyError(key) from ValueError
else:
self.__data[new_key] = value
def insert_after(self, key, new_key, value):
try:
self.__keys.insert(self.__keys.index(key) + 1, new_key)
except ValueError:
raise KeyError(key) from ValueError
else:
self.__data[new_key] = value
def __getitem__(self, key):
return self.__data[key]
def __setitem__(self, key, value):
self.__keys.append(key)
self.__data[key] = value
def __delitem__(self, key):
del self.__data[key]
self.__keys.remove(key)
def __iter__(self):
return iter(self.__keys)
def __len__(self):
return len(self.__keys)
def __contains__(self, key):
return key in self.__keys
def __eq__(self, other):
try:
return (self.__data == dict(other.items()) and
self.__keys == list(other.keys()))
except AttributeError:
return False
def keys(self):
for key in self.__keys:
yield key
def items(self):
for key in self.__keys:
yield key, self.__data[key]
def values(self):
for key in self.__keys:
yield self.__data[key]
def get(self, key, default=None):
try:
return self.__data[key]
except KeyError:
return default
def pop(self, key, default=None):
value = self.get(key, default)
self.__delitem__(key)
return value
def popitem(self):
try:
return self.__data.pop(self.__keys.pop())
except IndexError:
raise KeyError('%s is empty' % self.__class__.__name__)
def clear(self):
self.__keys = []
self.__data = {}
def update(self, mapping):
for key, value in mapping.items():
self.__keys.append(key)
self.__data[key] = value
def setdefault(self, key, default):
try:
return self[key]
except KeyError:
self[key] = default
return self[key]
def __repr__(self):
return 'MutableOrderingDict(%s)' % ', '.join(('%r: %r' % (k, v)
for k, v in self.items()))
I ended up implementing the whole collections.abc.MutableMapping contract because none of the methods were very long, but you probably won't use all of them. In particular, __eq__ and popitem are a little arbitrary. I changed your signature on the insert_* methods to a 4-argument one that feels a little more natural to me. Final note: Only tested on Python 3.5. Certainly will not work on Python 2 without some (minor) changes.

Trying out the new dict object in 3.7 and thought I'd try to implement what Two-Bit Alchemist had done with his answer but just overriding the native dict class because in 3.7 dict's are ordered.
''' Script that extends python3.7 dictionary to include insert_before and insert_after methods. '''
from sys import exit as sExit
class MutableDict(dict):
''' Class that extends python3.7 dictionary to include insert_before and insert_after methods. '''
def insert_before(self, key, newKey, val):
''' Insert newKey:value into dict before key'''
try:
__keys = list(self.keys())
__vals = list(self.values())
insertAt = __keys.index(key)
__keys.insert(insertAt, newKey)
__vals.insert(insertAt, val)
self.clear()
self.update({x: __vals[i] for i, x in enumerate(__keys)})
except ValueError as e:
sExit(e)
def insert_after(self, key, newKey, val):
''' Insert newKey:value into dict after key'''
try:
__keys = list(self.keys())
__vals = list(self.values())
insertAt = __keys.index(key) + 1
if __keys[-1] != key:
__keys.insert(insertAt, newKey)
__vals.insert(insertAt, val)
self.clear()
self.update({x: __vals[i] for i, x in enumerate(__keys)})
else:
self.update({newKey: val})
except ValueError as e:
sExit(e)
A little testing:
In: v = MutableDict([('a', 1), ('b', 2), ('c', 3)])
Out: {'a': 1, 'b': 2, 'c': 3}
In: v.insert_before('a', 'g', 5)
Out: {'g': 5, 'a': 1, 'b': 2, 'c': 3}
In: v.insert_after('b', 't', 5)
Out: {'g': 5, 'a': 1, 'b': 2, 't': 5, 'c': 3}
Edit: I decided to do a little benchmark test to see what kind of performance hit this would take. I will use from timeit import timeit
Get a baseline. Create a dict with arbitrary values.
In: timeit('{x: ord(x) for x in string.ascii_lowercase[:27]}', setup='import string', number=1000000)
Out: 1.8214202160015702
See how much longer it would take to initialize the MutableDict with the same arbitrary values as before.
In: timeit('MD({x: ord(x) for x in string.ascii_lowercase[:27]})', setup='import string; from MutableDict import MutableDict as MD', number=1000000)
Out: 2.382507269998314
1.82 / 2.38 = 0.76. So if I'm thinking about this right MutableDict is 24% slower on creation.
Lets see how long it takes to do an insert. For this test I'll use the insert_after method as it is slightly bigger. Will also look for a key close to the end for insertion. 't' in this case.
In: timeit('v.insert_after("t", "zzrr", ord("z"))', setup='import string; from MutableDict import MutableDict as MD; v = MD({x: ord(x) for x in string.ascii_lowercase[:27]})' ,number=1000000)
Out: 3.9161406760104
2.38 / 3.91 = 0.60, 40% slower inserting_after than it's initialization. Not bad on a small test of 1 million loops. For a comparison in time relation we'll test this:
In: timeit('"-".join(map(str, range(100)))', number=1000000)
Out: 10.342204540997045
Not quite an apples to apples comparison but I hope these tests will aid you in your(reader not necessarily OP) decision to use or not use this class in your 3.7 projects.

Since Python 3.2, move_to_end can be used to move items around in an OrderedDict. The following code will implement the insert functionality by moving all items after the provided index to the end.
Note that this isn't very efficient and should be used sparingly (if at all).
def ordered_dict_insert(ordered_dict, index, key, value):
if key in ordered_dict:
raise KeyError("Key already exists")
if index < 0 or index > len(ordered_dict):
raise IndexError("Index out of range")
keys = list(ordered_dict.keys())[index:]
ordered_dict[key] = value
for k in keys:
ordered_dict.move_to_end(k)
There are obvious optimizations and improvements that could be made, but that's the general idea.

from collections import OrderedDict
od1 = OrderedDict([
('a', 1),
('b', 2),
('d', 4),
])
items = od1.items()
items.insert(2, ('c', 3))
od2 = OrderedDict(items)
print(od2) # OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4)])

Related

RecursionError: maximum recursion depth exceeded while calling a Python object when using pickle.load()

Firstly I'm aware that there have been multiple questions already asked regarding this particular error but I can't find any that address the precise context in which it's occurring for me. I've also tried the solutions provided for other similar errors and it hasn't made any difference.
I'm using the python module pickle to save an object to file and the reload it using the following code:
with open('test_file.pkl', 'wb') as a:
pickle.dump(object1, a, pickle.HIGHEST_PROTOCOL)
This doesn't throw any error but then when I try and open the file using the following code:
with open('test_file.pkl', 'rb') as a:
object2 = pickle.load(a)
I get this error:
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
<ipython-input-3-8c5a70d147f7> in <module>()
1 with open('2test_bolfi_results.pkl', 'rb') as a:
----> 2 results = pickle.load(a)
3
~/.local/lib/python3.5/site-packages/elfi/methods/results.py in __getattr__(self, item)
95 def __getattr__(self, item):
96 """Allow more convenient access to items under self.meta."""
---> 97 if item in self.meta.keys():
98 return self.meta[item]
99 else:
... last 1 frames repeated, from the frame below ...
~/.local/lib/python3.5/site-packages/elfi/methods/results.py in __getattr__(self, item)
95 def __getattr__(self, item):
96 """Allow more convenient access to items under self.meta."""
---> 97 if item in self.meta.keys():
98 return self.meta[item]
99 else:
RecursionError: maximum recursion depth exceeded while calling a Python object
I'm aware other people have seen this same error (Hitting Maximum Recursion Depth Using Pickle / cPickle) when doing pickle.dump and I've tried increasing the maximum recursion depth by doing sys.setrecursionlimit() but this doesn't work, I either get the same error as above or I increase it further and python crashes with the message: Segmentation fault (core dumped).
I suspect that the root of the problem is actually when I save the object with pickle.load() but I don't really know how to diagnose it.
Any suggestions?
(I'm running python3 on a windows 10 machine)
Here's a fairly minimal class derived from collections.UserDict which performs the same trick that your problem object does. It's a dictionary which allows you to access its items either via normal dict syntax, or as attributes. I've thrown in a few print calls so we can see when the main methods get called.
import collections
class AttrDict(collections.UserDict):
''' A dictionary that can be accessed via attributes '''
def __setattr__(self, key, value):
print('SA', key, value)
if key == 'data':
super().__setattr__('data', value)
else:
self.data[key] = value
def __getattr__(self, key):
print('GA', key)
if key in self.data:
return self.data[key]
else:
print('NOKEY')
raise AttributeError
def __delattr__(self, key):
del self.data[key]
# test
keys = 'zero', 'one', 'two', 'three'
data = {k: i for i, k in enumerate(keys)}
d = AttrDict(data)
print(d)
print(d.zero, d.one, d.two, d['three'])
output
SA data {}
{'zero': 0, 'one': 1, 'two': 2, 'three': 3}
GA zero
GA one
GA two
0 1 2 3
So far, so good. But if we try to pickle our d instance, we get RecursionError because of that __getattr__ which does the magic conversion of attribute access to key lookup. We can overcome that by providing the class with __getstate__ and __setstate__ methods.
import pickle
import collections
class AttrDict(collections.UserDict):
''' A dictionary that can be accessed via attributes '''
def __setattr__(self, key, value):
print('SA', key, value)
if key == 'data':
super().__setattr__('data', value)
else:
self.data[key] = value
def __getattr__(self, key):
print('GA', key)
if key in self.data:
return self.data[key]
else:
print('NOKEY')
raise AttributeError
def __delattr__(self, key):
del self.data[key]
def __getstate__(self):
print('GS')
return self.data
def __setstate__(self, state):
print('SS')
self.data = state
# tests
keys = 'zero', 'one', 'two', 'three'
data = {k: i for i, k in enumerate(keys)}
d = AttrDict(data)
print(d)
print(d.zero, d.one, d.two, d['three'])
print('Pickling')
s = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
print(s)
print('Unpickling')
obj = pickle.loads(s)
print(obj)
output
SA data {}
{'zero': 0, 'one': 1, 'two': 2, 'three': 3}
GA zero
GA one
GA two
0 1 2 3
Pickling
GS
b'\x80\x04\x95D\x00\x00\x00\x00\x00\x00\x00\x8c\x08__main__\x94\x8c\x08AttrDict\x94\x93\x94)\x81\x94}\x94(\x8c\x04zero\x94K\x00\x8c\x03one\x94K\x01\x8c\x03two\x94K\x02\x8c\x05three\x94K\x03ub.'
Unpickling
SS
SA data {'zero': 0, 'one': 1, 'two': 2, 'three': 3}
{'zero': 0, 'one': 1, 'two': 2, 'three': 3}
But what can we do to repair an existing class with this behaviour? Fortunately, Python allows us to easily add new methods to an existing class, even one that we obtain via importing.
import pickle
import collections
class AttrDict(collections.UserDict):
''' A dictionary that can be accessed via attributes '''
def __setattr__(self, key, value):
print('SA', key, value)
if key == 'data':
super().__setattr__('data', value)
else:
self.data[key] = value
def __getattr__(self, key):
print('GA', key)
if key in self.data:
return self.data[key]
else:
print('NOKEY')
raise AttributeError
def __delattr__(self, key):
del self.data[key]
# Patch the existing AttrDict class with __getstate__ & __setstate__ methods
def getstate(self):
print('GS')
return self.data
def setstate(self, state):
print('SS')
self.data = state
AttrDict.__getstate__ = getstate
AttrDict.__setstate__ = setstate
# tests
keys = 'zero', 'one', 'two', 'three'
data = {k: i for i, k in enumerate(keys)}
d = AttrDict(data)
print(d)
print(d.zero, d.one, d.two, d['three'])
print('Pickling')
s = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
print(s)
print('Unpickling')
obj = pickle.loads(s)
print(obj)
This code produces the same output as the previous version, so I won't repeat it here.
Hopefully, this gives you enough info to repair your faulty object. My __getstate__ & __setstate__ methods only save and restore the stuff in the .data dictionary. To properly pickle your object, we may need to be a bit more drastic. For example, we may need to save and restore the instance's .__dict__ attribute, rather than just the .data attribute, which corresponds to the .meta attribute in your problem object.

How to prevent key creation through d[key] = val

Suppose I have d = {'dogs': 3}. Using:
d['cats'] = 2
would create the key 'cats' and give it the value 2.
If I really intend to update a dict with a new key and value, I would use d.update(cats=2) because it feels more explicit.
Having automatic creation of a key feels error prone (especially in larger programs), e.g.:
# I decide to make a change to my dict.
d = {'puppies': 4, 'big_dogs': 2}
# Lots and lots of code.
# ....
def change_my_dogs_to_maximum_room_capacity():
# But I forgot to change this as well and there is no error to inform me.
# Instead a bug was created.
d['dogs'] = 1
Question:
Is there a way to disable the automatic creation of a key that doesn't exist through d[key] = value, and instead raise a KeyError?
Everything else should keep working though:
d = new_dict() # Works
d = new_dict(hi=1) # Works
d.update(c=5, x=2) # Works
d.setdefault('9', 'something') # Works
d['a_new_key'] = 1 # Raises KeyError
You could create a child of dict with a special __setitem__ method that refuses keys that didn't exist when it was initially created:
class StrictDict(dict):
def __setitem__(self, key, value):
if key not in self:
raise KeyError("{} is not a legal key of this StricDict".format(repr(key)))
dict.__setitem__(self, key, value)
x = StrictDict({'puppies': 4, 'big_dogs': 2})
x["puppies"] = 23 #this works
x["dogs"] = 42 #this raises an exception
It's not totally bulletproof (it will allow x.update({"cats": 99}) without complaint, for example), but it prevents the most likely case.
Inherit dict class and override __setitem__ to suits your needs.Try this
class mydict(dict):
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
raise KeyError(key)
>>>a=mydict({'a':3})
>>>d
{'a': 3}
>>>d['a']
3
>>>d['b']=4
KeyError: 'b'
This will only allow new keys to be added with key=value using update:
class MyDict(dict):
def __init__(self, d):
dict.__init__(self)
self.instant = False
self.update(d)
def update(self, other=None, **kwargs):
if other is not None:
if isinstance(other, dict):
for k, v in other.items():
self[k] = v
else:
for k, v in other:
self[k] = v
else:
dict.update(self, kwargs)
self.instant = True
def __setitem__(self, key, value):
if self.instant and key not in self:
raise KeyError(key)
dict.__setitem__(self, key, value)
x = MyDict({1:2,2:3})
x[1] = 100 # works
x.update(cat=1) # works
x.update({2:200}) # works
x["bar"] = 3 # error
x.update({"foo":2}) # error
x.update([(5,2),(3,4)]) # error

How can I access a deeply nested dictionary using tuples?

I would like to expand on the autovivification example given in a previous answer from nosklo to allow dictionary access by tuple.
nosklo's solution looks like this:
class AutoVivification(dict):
"""Implementation of perl's autovivification feature."""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
Testing:
a = AutoVivification()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a
Output:
{1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
I have a case where I want to set a node given some arbitrary tuple of subscripts. If I don't know how many layers deep the tuple will be, how can I design a way to set the appropriate node?
I'm thinking that perhaps I could use syntax like the following:
mytuple = (1,2,3)
a[mytuple] = 4
But I'm having trouble coming up with a working implementation.
Update
I have a fully working example based on #JCash's answer:
class NestedDict(dict):
"""
Nested dictionary of arbitrary depth with autovivification.
Allows data access via extended slice notation.
"""
def __getitem__(self, keys):
# Let's assume *keys* is a list or tuple.
if not isinstance(keys, basestring):
try:
node = self
for key in keys:
node = dict.__getitem__(node, key)
return node
except TypeError:
# *keys* is not a list or tuple.
pass
try:
return dict.__getitem__(self, keys)
except KeyError:
raise KeyError(keys)
def __setitem__(self, keys, value):
# Let's assume *keys* is a list or tuple.
if not isinstance(keys, basestring):
try:
node = self
for key in keys[:-1]:
try:
node = dict.__getitem__(node, key)
except KeyError:
node[key] = type(self)()
node = node[key]
return dict.__setitem__(node, keys[-1], value)
except TypeError:
# *keys* is not a list or tuple.
pass
dict.__setitem__(self, keys, value)
Which can achieve the same output as above using extended slice notation:
d = NestedDict()
d[1,2,3] = 4
d[1,3,3] = 5
d[1,2,'test'] = 6
This seems to work
def __setitem__(self, key, value):
if isinstance(key, tuple):
node = self
for i in key[:-1]:
try:
node = dict.__getitem__(node, i)
except KeyError:
node = node[i] = type(self)()
return dict.__setitem__(node, i, value)
return dict.__setitem__(self, key, value)

Recursively access dict via attributes as well as index access?

I'd like to be able to do something like this:
from dotDict import dotdictify
life = {'bigBang':
{'stars':
{'planets': []}
}
}
dotdictify(life)
# This would be the regular way:
life['bigBang']['stars']['planets'] = {'earth': {'singleCellLife': {}}}
# But how can we make this work?
life.bigBang.stars.planets.earth = {'singleCellLife': {}}
#Also creating new child objects if none exist, using the following syntax:
life.bigBang.stars.planets.earth.multiCellLife = {'reptiles':{},'mammals':{}}
My motivations are to improve the succinctness of the code, and if possible use similar syntax to Javascript for accessing JSON objects for efficient cross platform development. (I also use Py2JS and similar.)
Here's one way to create that kind of experience:
class DotDictify(dict):
MARKER = object()
def __init__(self, value=None):
if value is None:
pass
elif isinstance(value, dict):
for key in value:
self.__setitem__(key, value[key])
else:
raise TypeError('expected dict')
def __setitem__(self, key, value):
if isinstance(value, dict) and not isinstance(value, DotDictify):
value = DotDictify(value)
super(DotDictify, self).__setitem__(key, value)
def __getitem__(self, key):
found = self.get(key, DotDictify.MARKER)
if found is DotDictify.MARKER:
found = DotDictify()
super(DotDictify, self).__setitem__(key, found)
return found
__setattr__, __getattr__ = __setitem__, __getitem__
if __name__ == '__main__':
life = {'bigBang':
{'stars':
{'planets': {} # Value changed from []
}
}
}
life = DotDictify(life)
print(life.bigBang.stars.planets) # -> []
life.bigBang.stars.planets.earth = {'singleCellLife' : {}}
print(life.bigBang.stars.planets) # -> {'earth': {'singleCellLife': {}}}
Below another implementation of a nested attribute dictionary (inspired by the answer of Curt Hagenlocher, stripped down to the essential):
class AttrDict(dict):
""" Nested Attribute Dictionary
A class to convert a nested Dictionary into an object with key-values
accessible using attribute notation (AttrDict.attribute) in addition to
key notation (Dict["key"]). This class recursively sets Dicts to objects,
allowing you to recurse into nested dicts (like: AttrDict.attr.attr)
"""
def __init__(self, mapping=None):
super(AttrDict, self).__init__()
if mapping is not None:
for key, value in mapping.items():
self.__setitem__(key, value)
def __setitem__(self, key, value):
if isinstance(value, dict):
value = AttrDict(value)
super(AttrDict, self).__setitem__(key, value)
self.__dict__[key] = value # for code completion in editors
def __getattr__(self, item):
try:
return self.__getitem__(item)
except KeyError:
raise AttributeError(item)
__setattr__ = __setitem__
This works in both Python 2 and 3:
life = AttrDict({'bigBang': {'stars': {'planets': {}}}})
life['bigBang']['stars']['planets'] = {'earth': {'singleCellLife': {}}}
life.bigBang.stars.planets.earth.multiCellLife = {'reptiles': {}, 'mammals': {}}
print(life.bigBang.stars.planets.earth)
# -> {'singleCellLife': {}, 'multiCellLife': {'mammals': {}, 'reptiles': {}}}
Converting KeyError into AttributeError in __getattr__ is required in Python3 such that hasattr works also in case the attribute is not found:
hasattr(life, 'parallelUniverse')
# --> False
There is a package doing exactly what you want and also something more and it is called Prodict.
from prodict import Prodict
life_dict = {'bigBang':
{'stars':
{'planets': []}
}
}
life = Prodict.from_dict(life_dict)
print(life.bigBang.stars.planets)
# prints []
# you can even add new properties dynamically
life.bigBang.galaxies = []
PS: I'm the author of the Prodict.
Here is another solution:
from typing import Dict, Any
class PropertyTree: pass
def dict_to_prop_tree(yaml_config: Dict[str, Any]) -> PropertyTree:
tree = PropertyTree()
for key, value in yaml_config.items():
if type(value) == dict:
setattr(tree, key, dict_to_obj_tree(value))
elif type(value) == list:
setattr(tree, key, [dict_to_obj_tree(v) for v in value])
else:
setattr(tree, key, value)
return tree
Then in the python console:
d={'a': 1, 'b': 2, 'c': {'d': 4, 'e': 5, 'f': {'g': 6}, 'h': {}, 'j': 7}}
tree=dict_to_prop_tree(d)
tree.a
tree.c.f.g
prints the correct values
#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
# Author: Xingbang Jiang
# E-mail: 1278561590#qq.com
# HomePage: http://www.xingbangsharing.tech
class Dotsdict(dict):
def __init__(self, args, **kwargs):
super(Dotsdict, self).__init__(args, **kwargs)
for obj in [args, kwargs]:
for k, v in obj.items():
if isinstance(v, dict):
v = Dotsdict(v)
self.__setitem__(k, v)
def __setitem__(self, key, val):
super(Dotsdict, self).__setitem__(key, val)
# self.__dict__[key] = val
def __delitem__(self, key):
super(Dotsdict, self).__delitem__(key)
# del self.__dict__[key]
def __getitem__(self, key):
return super(Dotsdict, self).__getitem__(key)
def __missing__(self, key):
dots = Dotsdict()
self.__setitem__(key, dots)
return dots
__setattr__, __delattr__, __getattr__ = __setitem__, __delitem__, __getitem__
# ===================================================================
d = {'k': 'v', 'x': {'y': 'z', 'p': 'q', }, }
print(type(d))
print(d)
dd = Dotsdict(d, i='j')
print(type(dd))
print(dd)
print('========================================')
dd.a = 'b'
dd.x.m = 'n'
print(dd.x.y)
del dd.x['p']
print(dd)
print(len(dd))

Multiple levels of 'collection.defaultdict' in Python

Thanks to some great folks on SO, I discovered the possibilities offered by collections.defaultdict, notably in readability and speed. I have put them to use with success.
Now I would like to implement three levels of dictionaries, the two top ones being defaultdict and the lowest one being int. I don't find the appropriate way to do this. Here is my attempt:
from collections import defaultdict
d = defaultdict(defaultdict)
a = [("key1", {"a1":22, "a2":33}),
("key2", {"a1":32, "a2":55}),
("key3", {"a1":43, "a2":44})]
for i in a:
d[i[0]] = i[1]
Now this works, but the following, which is the desired behavior, doesn't:
d["key4"]["a1"] + 1
I suspect that I should have declared somewhere that the second level defaultdict is of type int, but I didn't find where or how to do so.
The reason I am using defaultdict in the first place is to avoid having to initialize the dictionary for each new key.
Any more elegant suggestion?
Thanks pythoneers!
Use:
from collections import defaultdict
d = defaultdict(lambda: defaultdict(int))
This will create a new defaultdict(int) whenever a new key is accessed in d.
Another way to make a pickleable, nested defaultdict is to use a partial object instead of a lambda:
from functools import partial
...
d = defaultdict(partial(defaultdict, int))
This will work because the defaultdict class is globally accessible at the module level:
"You can't pickle a partial object unless the function [or in this
case, class] it wraps is globally accessible ... under its __name__
(within its __module__)"
-- Pickling wrapped partial functions
Look at nosklo's answer here for a more general solution.
class AutoVivification(dict):
"""Implementation of perl's autovivification feature."""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
Testing:
a = AutoVivification()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a
Output:
{1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
As per #rschwieb's request for D['key'] += 1, we can expand on previous by overriding addition by defining __add__ method, to make this behave more like a collections.Counter()
First __missing__ will be called to create a new empty value, which will be passed into __add__. We test the value, counting on empty values to be False.
See emulating numeric types for more information on overriding.
from numbers import Number
class autovivify(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition for numeric types when self is empty """
if not self and isinstance(x, Number):
return x
raise ValueError
def __sub__(self, x):
if not self and isinstance(x, Number):
return -1 * x
raise ValueError
Examples:
>>> import autovivify
>>> a = autovivify.autovivify()
>>> a
{}
>>> a[2]
{}
>>> a
{2: {}}
>>> a[4] += 1
>>> a[5][3][2] -= 1
>>> a
{2: {}, 4: 1, 5: {3: {2: -1}}}
Rather than checking argument is a Number (very non-python, amirite!) we could just provide a default 0 value and then attempt the operation:
class av2(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition when self is empty """
if not self:
return 0 + x
raise ValueError
def __sub__(self, x):
""" override subtraction when self is empty """
if not self:
return 0 - x
raise ValueError
Late to the party, but for arbitrary depth I just found myself doing something like this:
from collections import defaultdict
class DeepDict(defaultdict):
def __call__(self):
return DeepDict(self.default_factory)
The trick here is basically to make the DeepDict instance itself a valid factory for constructing missing values. Now we can do things like
dd = DeepDict(DeepDict(list))
dd[1][2].extend([3,4])
sum(dd[1][2]) # 7
ddd = DeepDict(DeepDict(DeepDict(list)))
ddd[1][2][3].extend([4,5])
sum(ddd[1][2][3]) # 9
def _sub_getitem(self, k):
try:
# sub.__class__.__bases__[0]
real_val = self.__class__.mro()[-2].__getitem__(self, k)
val = '' if real_val is None else real_val
except Exception:
val = ''
real_val = None
# isinstance(Avoid,dict)也是true,会一直递归死
if type(val) in (dict, list, str, tuple):
val = type('Avoid', (type(val),), {'__getitem__': _sub_getitem, 'pop': _sub_pop})(val)
# 重新赋值当前字典键为返回值,当对其赋值时可回溯
if all([real_val is not None, isinstance(self, (dict, list)), type(k) is not slice]):
self[k] = val
return val
def _sub_pop(self, k=-1):
try:
val = self.__class__.mro()[-2].pop(self, k)
val = '' if val is None else val
except Exception:
val = ''
if type(val) in (dict, list, str, tuple):
val = type('Avoid', (type(val),), {'__getitem__': _sub_getitem, 'pop': _sub_pop})(val)
return val
class DefaultDict(dict):
def __getitem__(self, k):
return _sub_getitem(self, k)
def pop(self, k):
return _sub_pop(self, k)
In[8]: d=DefaultDict()
In[9]: d['a']['b']['c']['d']
Out[9]: ''
In[10]: d['a']="ggggggg"
In[11]: d['a']
Out[11]: 'ggggggg'
In[12]: d['a']['pp']
Out[12]: ''
No errors again.
No matter how many levels nested.
pop no error also
dd=DefaultDict({"1":333333})

Categories

Resources