Related
I want to insert an item into an OrderedDict at a certain position.
Using the gist of this SO answer i have the problem that it doesn't work on python 3.
This is the implementation used
from collections import OrderedDict
class ListDict(OrderedDict):
def __init__(self, *args, **kwargs):
super(ListDict, self).__init__(*args, **kwargs)
def __insertion(self, link_prev, key_value):
key, value = key_value
if link_prev[2] != key:
if key in self:
del self[key]
link_next = link_prev[1]
self._OrderedDict__map[key] = link_prev[1] = link_next[0] = [link_prev, link_next, key]
dict.__setitem__(self, key, value)
def insert_after(self, existing_key, key_value):
self.__insertion(self._OrderedDict__map[existing_key], key_value)
def insert_before(self, existing_key, key_value):
self.__insertion(self._OrderedDict__map[existing_key][0], key_value)
Using it like
ld = ListDict([(1,1), (2,2), (3,3)])
ld.insert_before(2, (1.5, 1.5))
gives
File "...", line 35, in insert_before
self.__insertion(self._OrderedDict__map[existing_key][0], key_value)
AttributeError: 'ListDict' object has no attribute '_OrderedDict__map'
It works with python 2.7. What is the reason that it fails in python 3?
Checking the source code of the OrderedDict implementation shows that self.__map is used instead of self._OrderedDict__map. Changing the code to the usage of self.__map gives
AttributeError: 'ListDict' object has no attribute '_ListDict__map'
How come? And how can i make this work in python 3? OrderedDict uses the internal __map attribute to store a doubly linked list. So how can i access this attribute properly?
I'm not sure you wouldn't be better served just keeping up with a separate list and dict in your code, but here is a stab at a pure Python implementation of such an object. This will be an order of magnitude slower than an actual OrderedDict in Python 3.5, which as I pointed out in my comment has been rewritten in C.
"""
A list/dict hybrid; like OrderedDict with insert_before and insert_after
"""
import collections.abc
class MutableOrderingDict(collections.abc.MutableMapping):
def __init__(self, iterable_or_mapping=None, **kw):
# This mimics dict's initialization and accepts the same arguments
# Of course, you have to pass an ordered iterable or mapping unless you
# want the order to be arbitrary. Garbage in, garbage out and all :)
self.__data = {}
self.__keys = []
if iterable_or_mapping is not None:
try:
iterable = iterable_or_mapping.items()
except AttributeError:
iterable = iterable_or_mapping
for key, value in iterable:
self.__keys.append(key)
self.__data[key] = value
for key, value in kw.items():
self.__keys.append(key)
self.__data[key] = value
def insert_before(self, key, new_key, value):
try:
self.__keys.insert(self.__keys.index(key), new_key)
except ValueError:
raise KeyError(key) from ValueError
else:
self.__data[new_key] = value
def insert_after(self, key, new_key, value):
try:
self.__keys.insert(self.__keys.index(key) + 1, new_key)
except ValueError:
raise KeyError(key) from ValueError
else:
self.__data[new_key] = value
def __getitem__(self, key):
return self.__data[key]
def __setitem__(self, key, value):
self.__keys.append(key)
self.__data[key] = value
def __delitem__(self, key):
del self.__data[key]
self.__keys.remove(key)
def __iter__(self):
return iter(self.__keys)
def __len__(self):
return len(self.__keys)
def __contains__(self, key):
return key in self.__keys
def __eq__(self, other):
try:
return (self.__data == dict(other.items()) and
self.__keys == list(other.keys()))
except AttributeError:
return False
def keys(self):
for key in self.__keys:
yield key
def items(self):
for key in self.__keys:
yield key, self.__data[key]
def values(self):
for key in self.__keys:
yield self.__data[key]
def get(self, key, default=None):
try:
return self.__data[key]
except KeyError:
return default
def pop(self, key, default=None):
value = self.get(key, default)
self.__delitem__(key)
return value
def popitem(self):
try:
return self.__data.pop(self.__keys.pop())
except IndexError:
raise KeyError('%s is empty' % self.__class__.__name__)
def clear(self):
self.__keys = []
self.__data = {}
def update(self, mapping):
for key, value in mapping.items():
self.__keys.append(key)
self.__data[key] = value
def setdefault(self, key, default):
try:
return self[key]
except KeyError:
self[key] = default
return self[key]
def __repr__(self):
return 'MutableOrderingDict(%s)' % ', '.join(('%r: %r' % (k, v)
for k, v in self.items()))
I ended up implementing the whole collections.abc.MutableMapping contract because none of the methods were very long, but you probably won't use all of them. In particular, __eq__ and popitem are a little arbitrary. I changed your signature on the insert_* methods to a 4-argument one that feels a little more natural to me. Final note: Only tested on Python 3.5. Certainly will not work on Python 2 without some (minor) changes.
Trying out the new dict object in 3.7 and thought I'd try to implement what Two-Bit Alchemist had done with his answer but just overriding the native dict class because in 3.7 dict's are ordered.
''' Script that extends python3.7 dictionary to include insert_before and insert_after methods. '''
from sys import exit as sExit
class MutableDict(dict):
''' Class that extends python3.7 dictionary to include insert_before and insert_after methods. '''
def insert_before(self, key, newKey, val):
''' Insert newKey:value into dict before key'''
try:
__keys = list(self.keys())
__vals = list(self.values())
insertAt = __keys.index(key)
__keys.insert(insertAt, newKey)
__vals.insert(insertAt, val)
self.clear()
self.update({x: __vals[i] for i, x in enumerate(__keys)})
except ValueError as e:
sExit(e)
def insert_after(self, key, newKey, val):
''' Insert newKey:value into dict after key'''
try:
__keys = list(self.keys())
__vals = list(self.values())
insertAt = __keys.index(key) + 1
if __keys[-1] != key:
__keys.insert(insertAt, newKey)
__vals.insert(insertAt, val)
self.clear()
self.update({x: __vals[i] for i, x in enumerate(__keys)})
else:
self.update({newKey: val})
except ValueError as e:
sExit(e)
A little testing:
In: v = MutableDict([('a', 1), ('b', 2), ('c', 3)])
Out: {'a': 1, 'b': 2, 'c': 3}
In: v.insert_before('a', 'g', 5)
Out: {'g': 5, 'a': 1, 'b': 2, 'c': 3}
In: v.insert_after('b', 't', 5)
Out: {'g': 5, 'a': 1, 'b': 2, 't': 5, 'c': 3}
Edit: I decided to do a little benchmark test to see what kind of performance hit this would take. I will use from timeit import timeit
Get a baseline. Create a dict with arbitrary values.
In: timeit('{x: ord(x) for x in string.ascii_lowercase[:27]}', setup='import string', number=1000000)
Out: 1.8214202160015702
See how much longer it would take to initialize the MutableDict with the same arbitrary values as before.
In: timeit('MD({x: ord(x) for x in string.ascii_lowercase[:27]})', setup='import string; from MutableDict import MutableDict as MD', number=1000000)
Out: 2.382507269998314
1.82 / 2.38 = 0.76. So if I'm thinking about this right MutableDict is 24% slower on creation.
Lets see how long it takes to do an insert. For this test I'll use the insert_after method as it is slightly bigger. Will also look for a key close to the end for insertion. 't' in this case.
In: timeit('v.insert_after("t", "zzrr", ord("z"))', setup='import string; from MutableDict import MutableDict as MD; v = MD({x: ord(x) for x in string.ascii_lowercase[:27]})' ,number=1000000)
Out: 3.9161406760104
2.38 / 3.91 = 0.60, 40% slower inserting_after than it's initialization. Not bad on a small test of 1 million loops. For a comparison in time relation we'll test this:
In: timeit('"-".join(map(str, range(100)))', number=1000000)
Out: 10.342204540997045
Not quite an apples to apples comparison but I hope these tests will aid you in your(reader not necessarily OP) decision to use or not use this class in your 3.7 projects.
Since Python 3.2, move_to_end can be used to move items around in an OrderedDict. The following code will implement the insert functionality by moving all items after the provided index to the end.
Note that this isn't very efficient and should be used sparingly (if at all).
def ordered_dict_insert(ordered_dict, index, key, value):
if key in ordered_dict:
raise KeyError("Key already exists")
if index < 0 or index > len(ordered_dict):
raise IndexError("Index out of range")
keys = list(ordered_dict.keys())[index:]
ordered_dict[key] = value
for k in keys:
ordered_dict.move_to_end(k)
There are obvious optimizations and improvements that could be made, but that's the general idea.
from collections import OrderedDict
od1 = OrderedDict([
('a', 1),
('b', 2),
('d', 4),
])
items = od1.items()
items.insert(2, ('c', 3))
od2 = OrderedDict(items)
print(od2) # OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4)])
Suppose I have d = {'dogs': 3}. Using:
d['cats'] = 2
would create the key 'cats' and give it the value 2.
If I really intend to update a dict with a new key and value, I would use d.update(cats=2) because it feels more explicit.
Having automatic creation of a key feels error prone (especially in larger programs), e.g.:
# I decide to make a change to my dict.
d = {'puppies': 4, 'big_dogs': 2}
# Lots and lots of code.
# ....
def change_my_dogs_to_maximum_room_capacity():
# But I forgot to change this as well and there is no error to inform me.
# Instead a bug was created.
d['dogs'] = 1
Question:
Is there a way to disable the automatic creation of a key that doesn't exist through d[key] = value, and instead raise a KeyError?
Everything else should keep working though:
d = new_dict() # Works
d = new_dict(hi=1) # Works
d.update(c=5, x=2) # Works
d.setdefault('9', 'something') # Works
d['a_new_key'] = 1 # Raises KeyError
You could create a child of dict with a special __setitem__ method that refuses keys that didn't exist when it was initially created:
class StrictDict(dict):
def __setitem__(self, key, value):
if key not in self:
raise KeyError("{} is not a legal key of this StricDict".format(repr(key)))
dict.__setitem__(self, key, value)
x = StrictDict({'puppies': 4, 'big_dogs': 2})
x["puppies"] = 23 #this works
x["dogs"] = 42 #this raises an exception
It's not totally bulletproof (it will allow x.update({"cats": 99}) without complaint, for example), but it prevents the most likely case.
Inherit dict class and override __setitem__ to suits your needs.Try this
class mydict(dict):
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
raise KeyError(key)
>>>a=mydict({'a':3})
>>>d
{'a': 3}
>>>d['a']
3
>>>d['b']=4
KeyError: 'b'
This will only allow new keys to be added with key=value using update:
class MyDict(dict):
def __init__(self, d):
dict.__init__(self)
self.instant = False
self.update(d)
def update(self, other=None, **kwargs):
if other is not None:
if isinstance(other, dict):
for k, v in other.items():
self[k] = v
else:
for k, v in other:
self[k] = v
else:
dict.update(self, kwargs)
self.instant = True
def __setitem__(self, key, value):
if self.instant and key not in self:
raise KeyError(key)
dict.__setitem__(self, key, value)
x = MyDict({1:2,2:3})
x[1] = 100 # works
x.update(cat=1) # works
x.update({2:200}) # works
x["bar"] = 3 # error
x.update({"foo":2}) # error
x.update([(5,2),(3,4)]) # error
Is there a way to rename a dictionary key, without reassigning its value to a new name and removing the old name key; and without iterating through dict key/value?
In case of OrderedDict do the same, while keeping that key's position.
For a regular dict, you can use:
mydict[k_new] = mydict.pop(k_old)
This will move the item to the end of the dict, unless k_new was already existing in which case it will overwrite the value in-place.
For a Python 3.7+ dict where you additionally want to preserve the ordering, the simplest is to rebuild an entirely new instance. For example, renaming key 2 to 'two':
>>> d = {0:0, 1:1, 2:2, 3:3}
>>> {"two" if k == 2 else k:v for k,v in d.items()}
{0: 0, 1: 1, 'two': 2, 3: 3}
The same is true for an OrderedDict, where you can't use dict comprehension syntax, but you can use a generator expression:
OrderedDict((k_new if k == k_old else k, v) for k, v in od.items())
Modifying the key itself, as the question asks for, is impractical because keys are hashable which usually implies they're immutable and can't be modified.
Using a check for newkey!=oldkey, this way you can do:
if newkey!=oldkey:
dictionary[newkey] = dictionary[oldkey]
del dictionary[oldkey]
In case of renaming all dictionary keys:
target_dict = {'k1':'v1', 'k2':'v2', 'k3':'v3'}
new_keys = ['k4','k5','k6']
for key,n_key in zip(target_dict.keys(), new_keys):
target_dict[n_key] = target_dict.pop(key)
You can use this OrderedDict recipe written by Raymond Hettinger and modify it to add a rename method, but this is going to be a O(N) in complexity:
def rename(self,key,new_key):
ind = self._keys.index(key) #get the index of old key, O(N) operation
self._keys[ind] = new_key #replace old key with new key in self._keys
self[new_key] = self[key] #add the new key, this is added at the end of self._keys
self._keys.pop(-1) #pop the last item in self._keys
Example:
dic = OrderedDict((("a",1),("b",2),("c",3)))
print dic
dic.rename("a","foo")
dic.rename("b","bar")
dic["d"] = 5
dic.rename("d","spam")
for k,v in dic.items():
print k,v
output:
OrderedDict({'a': 1, 'b': 2, 'c': 3})
foo 1
bar 2
c 3
spam 5
A few people before me mentioned the .pop trick to delete and create a key in a one-liner.
I personally find the more explicit implementation more readable:
d = {'a': 1, 'b': 2}
v = d['b']
del d['b']
d['c'] = v
The code above returns {'a': 1, 'c': 2}
Suppose you want to rename key k3 to k4:
temp_dict = {'k1':'v1', 'k2':'v2', 'k3':'v3'}
temp_dict['k4']= temp_dict.pop('k3')
Other answers are pretty good.But in python3.6, regular dict also has order. So it's hard to keep key's position in normal case.
def rename(old_dict,old_name,new_name):
new_dict = {}
for key,value in zip(old_dict.keys(),old_dict.values()):
new_key = key if key != old_name else new_name
new_dict[new_key] = old_dict[key]
return new_dict
In Python 3.6 (onwards?) I would go for the following one-liner
test = {'a': 1, 'old': 2, 'c': 3}
old_k = 'old'
new_k = 'new'
new_v = 4 # optional
print(dict((new_k, new_v) if k == old_k else (k, v) for k, v in test.items()))
which produces
{'a': 1, 'new': 4, 'c': 3}
May be worth noting that without the print statement the ipython console/jupyter notebook present the dictionary in an order of their choosing...
I am using #wim 's answer above, with dict.pop() when renaming keys, but I found a gotcha. Cycling through the dict to change the keys, without separating the list of old keys completely from the dict instance, resulted in cycling new, changed keys into the loop, and missing some existing keys.
To start with, I did it this way:
for current_key in my_dict:
new_key = current_key.replace(':','_')
fixed_metadata[new_key] = fixed_metadata.pop(current_key)
I found that cycling through the dict in this way, the dictionary kept finding keys even when it shouldn't, i.e., the new keys, the ones I had changed! I needed to separate the instances completely from each other to (a) avoid finding my own changed keys in the for loop, and (b) find some keys that were not being found within the loop for some reason.
I am doing this now:
current_keys = list(my_dict.keys())
for current_key in current_keys:
and so on...
Converting the my_dict.keys() to a list was necessary to get free of the reference to the changing dict. Just using my_dict.keys() kept me tied to the original instance, with the strange side effects.
In case someone wants to rename all the keys at once providing a list with the new names:
def rename_keys(dict_, new_keys):
"""
new_keys: type List(), must match length of dict_
"""
# dict_ = {oldK: value}
# d1={oldK:newK,} maps old keys to the new ones:
d1 = dict( zip( list(dict_.keys()), new_keys) )
# d1{oldK} == new_key
return {d1[oldK]: value for oldK, value in dict_.items()}
I came up with this function which does not mutate the original dictionary. This function also supports list of dictionaries too.
import functools
from typing import Union, Dict, List
def rename_dict_keys(
data: Union[Dict, List[Dict]], old_key: str, new_key: str
):
"""
This function renames dictionary keys
:param data:
:param old_key:
:param new_key:
:return: Union[Dict, List[Dict]]
"""
if isinstance(data, dict):
res = {k: v for k, v in data.items() if k != old_key}
try:
res[new_key] = data[old_key]
except KeyError:
raise KeyError(
"cannot rename key as old key '%s' is not present in data"
% old_key
)
return res
elif isinstance(data, list):
return list(
map(
functools.partial(
rename_dict_keys, old_key=old_key, new_key=new_key
),
data,
)
)
raise ValueError("expected type List[Dict] or Dict got '%s' for data" % type(data))
#helloswift123 I like your function. Here is a modification to rename multiple keys in a single call:
def rename(d, keymap):
"""
:param d: old dict
:type d: dict
:param keymap: [{:keys from-keys :values to-keys} keymap]
:returns: new dict
:rtype: dict
"""
new_dict = {}
for key, value in zip(d.keys(), d.values()):
new_key = keymap.get(key, key)
new_dict[new_key] = d[key]
return new_dict
You can use below code:
OldDict={'a':'v1', 'b':'v2', 'c':'v3'}
OldKey=['a','b','c']
NewKey=['A','B','C']
def DictKeyChanger(dict,OldKey,NewKey):
ListAllKey=list(dict.keys())
for x in range(0,len(NewKey)):
dict[NewKey[x]]=dict[OldKey[x]] if OldKey[x] in ListAllKey else None
for x in ListAllKey:
dict.pop(x)
return dict
NewDict=DictKeyChanger(OldDict,OldKey,NewKey)
print(NewDict)#===>>{'A': 'v1', 'B': 'v2', 'C': 'v3'}
Notes:
The length of list OldKey and list NewKey must be equal.
The length of the list OldKey must be equal to the listNewKey, if the key does not exist in the OldKey, put 'noexis' instead as shown as.
Example:
OldDict={'a':'v1', 'b':'v2', 'c':'v3'}
OldKey=['a','b','c','noexis','noexis']
NewKey=['A','B','C','D','E']
NewDict=DictKeyChanger(OldDict,OldKey,NewKey)
print(NewDict)#===>>{'A': 'v1', 'B': 'v2', 'C': 'v3', 'D': None, 'E': None}
For the keeping of order case (the other one is trivial, remove old and add new one): I was not satisfied with the ordered-dictionary needing reconstruction (at least partially), obviously for efficiency reasons, so I've put together a class (OrderedDictX) that extends OrderedDict and allows you to do key changes efficiently, i.e. in O(1) complexity. The implementation can also be adjusted for the now-ordered built-in dict class.
It uses 2 extra dictionaries to remap the changed keys ("external" - i.e. as they appear externally to the user) to the ones in the underlying OrderedDict ("internal") - the dictionaries will only hold keys that were changed so as long as no key changing is done they will be empty.
Performance measurements:
import timeit
import random
# Efficiency tests
from collections import MutableMapping
class OrderedDictRaymond(dict, MutableMapping):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at 1 argument, got %d', len(args))
if not hasattr(self, '_keys'):
self._keys = []
self.update(*args, **kwds)
def rename(self,key,new_key):
ind = self._keys.index(key) #get the index of old key, O(N) operation
self._keys[ind] = new_key #replace old key with new key in self._keys
self[new_key] = self[key] #add the new key, this is added at the end of self._keys
self._keys.pop(-1) #pop the last item in self._keys
dict.__delitem__(self, key)
def clear(self):
del self._keys[:]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
self._keys.append(key)
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
self._keys.remove(key)
def __iter__(self):
return iter(self._keys)
def __reversed__(self):
return reversed(self._keys)
def popitem(self):
if not self:
raise KeyError
key = self._keys.pop()
value = dict.pop(self, key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
inst_dict.pop('_keys', None)
return (self.__class__, (items,), inst_dict)
setdefault = MutableMapping.setdefault
update = MutableMapping.update
pop = MutableMapping.pop
keys = MutableMapping.keys
values = MutableMapping.values
items = MutableMapping.items
def __repr__(self):
pairs = ', '.join(map('%r: %r'.__mod__, self.items()))
return '%s({%s})' % (self.__class__.__name__, pairs)
def copy(self):
return self.__class__(self)
#classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
class obj_container:
def __init__(self, obj) -> None:
self.obj = obj
def change_key_splice(container, k_old, k_new):
od = container.obj
container.obj = OrderedDict((k_new if k == k_old else k, v) for k, v in od.items())
def change_key_raymond(container, k_old, k_new):
od = container.obj
od.rename(k_old, k_new)
def change_key_odx(container, k_old, k_new):
odx = container.obj
odx.change_key(k_old, k_new)
NUM_ITEMS = 20000
od_splice = OrderedDict([(x, x) for x in range(NUM_ITEMS)])
od_raymond = OrderedDictRaymond(od_splice.items())
odx = OrderedDictX(od_splice.items())
od_splice, od_raymond, odx = [obj_container(d) for d in [od_splice, od_raymond, odx]]
assert odx.obj == od_splice.obj
assert odx.obj == od_raymond.obj
# Pick randomly half of the keys to change
keys_to_change = random.sample(range(NUM_ITEMS), NUM_ITEMS//2)
print(f'OrderedDictX: {timeit.timeit(lambda: [change_key_odx(odx, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
print(f'OrderedDictRaymond: {timeit.timeit(lambda: [change_key_raymond(od_raymond, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
print(f'Splice: {timeit.timeit(lambda: [change_key_splice(od_splice, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
assert odx.obj == od_splice.obj
assert odx.obj == od_raymond.obj
And results:
OrderedDictX: 0.06587849999999995
OrderedDictRaymond: 1.1131364
Splice: 1165.2614647
As expected, the splicing method is extremely slow (didn't expect it to be that much slower either though) and uses a lot of memory, and the O(N) solution of #Ashwini Chaudhary (bug-fixed though, del also needed) is also slower, 17X times in this example.
Of course, this solution being O(1), compared to the O(N) OrderedDictRaymond the time difference becomes much more apparent as the dictionary size increases, e.g. for 5 times more elements (100000), the O(N) is now 100X slower:
NUM_ITEMS = 100000
OrderedDictX: 0.3636919999999999
OrderedDictRaymond: 36.3963971
Here's the code, please comment if you see issues or have improvements to propose as this might still be error-prone.
from collections import OrderedDict
class OrderedDictX(OrderedDict):
def __init__(self, *args, **kwargs):
# Mappings from new->old (ext2int), old->new (int2ext).
# Only the keys that are changed (internal key doesn't match what the user sees) are contained.
self._keys_ext2int = OrderedDict()
self._keys_int2ext = OrderedDict()
self.update(*args, **kwargs)
def change_key(self, k_old, k_new):
# Validate that the old key is part of the dict
if not self.__contains__(k_old):
raise Exception(f'Cannot rename key {k_old} to {k_new}: {k_old} not existing in dict')
# Return if no changing is actually to be done
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
return
# Validate that the new key would not conflict with another one
if self.__contains__(k_new):
raise Exception(f'Cannot rename key {k_old} to {k_new}: {k_new} already in dict')
# Change the key using internal dicts mechanism
if k_old in self._keys_ext2int:
# Revert change temporarily
k_old_int = self._keys_ext2int[k_old]
del self._keys_ext2int[k_old]
k_old = k_old_int
# Check if new key matches the internal key
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
del self._keys_int2ext[k_old]
return
# Finalize key change
self._keys_ext2int[k_new] = k_old
self._keys_int2ext[k_old] = k_new
def __contains__(self, k) -> bool:
if k in self._keys_ext2int:
return True
if not super().__contains__(k):
return False
return k not in self._keys_int2ext
def __getitem__(self, k):
if not self.__contains__(k):
# Intentionally raise KeyError in ext2int
return self._keys_ext2int[k]
return super().__getitem__(self._keys_ext2int.get(k, k))
def __setitem__(self, k, v):
if k in self._keys_ext2int:
return super().__setitem__(self._keys_ext2int[k], v)
# If the key exists in the internal state but was renamed to a k_ext,
# employ this trick: make it such that it appears as if k_ext has also been renamed to k
if k in self._keys_int2ext:
k_ext = self._keys_int2ext[k]
self._keys_ext2int[k] = k_ext
k = k_ext
return super().__setitem__(k, v)
def __delitem__(self, k):
if not self.__contains__(k):
# Intentionally raise KeyError in ext2int
del self._keys_ext2int[k]
if k in self._keys_ext2int:
k_int = self._keys_ext2int[k]
del self._keys_ext2int[k]
del self._keys_int2ext[k_int]
k = k_int
return super().__delitem__(k)
def __iter__(self):
yield from self.keys()
def __reversed__(self):
for k in reversed(super().keys()):
yield self._keys_int2ext.get(k, k)
def __eq__(self, other: object) -> bool:
if not isinstance(other, dict):
return False
if len(self) != len(other):
return False
for (k, v), (k_other, v_other) in zip(self.items(), other.items()):
if k != k_other or v != v_other:
return False
return True
def update(self, *args, **kwargs):
for k, v in OrderedDict(*args, **kwargs).items():
self.__setitem__(k, v)
def popitem(self, last=True) -> tuple:
if not last:
k = next(iter(self.keys()))
else:
k = next(iter(reversed(self.keys())))
v = self.__getitem__(k)
self.__delitem__(k)
return k, v
class OrderedDictXKeysView:
def __init__(self, odx: 'OrderedDictX', orig_keys):
self._odx = odx
self._orig_keys = orig_keys
def __iter__(self):
for k in self._orig_keys:
yield self._odx._keys_int2ext.get(k, k)
def __reversed__(self):
for k in reversed(self._orig_keys):
yield self._odx._keys_int2ext.get(k, k)
class OrderedDictXItemsView:
def __init__(self, odx: 'OrderedDictX', orig_items):
self._odx = odx
self._orig_items = orig_items
def __iter__(self):
for k, v in self._orig_items:
yield self._odx._keys_int2ext.get(k, k), v
def __reversed__(self):
for k, v in reversed(self._orig_items):
yield self._odx._keys_int2ext.get(k, k), v
def keys(self):
return self.OrderedDictXKeysView(self, super().keys())
def items(self):
return self.OrderedDictXItemsView(self, super().items())
def copy(self):
return OrderedDictX(self.items())
# FIXME: move this to pytest
if __name__ == '__main__':
MAX = 25
items = [(i+1, i+1) for i in range(MAX)]
keys = [i[0] for i in items]
d = OrderedDictX(items)
# keys() before change
print(list(d.items()))
assert list(d.keys()) == keys
# __contains__ before change
assert 1 in d
# __getitem__ before change
assert d[1] == 1
# __setitem__ before change
d[1] = 100
assert d[1] == 100
d[1] = 1
assert d[1] == 1
# __delitem__ before change
assert MAX in d
del d[MAX]
assert MAX not in d
d[MAX] = MAX
assert MAX in d
print('== Tests before key change finished ==')
# change_key and __contains__
assert MAX-1 in d
assert MAX*2 not in d
d.change_key(MAX-1, MAX*2)
assert MAX-1 not in d
assert MAX*2 in d
# items() and keys()
items[MAX-2] = (MAX*2, MAX-1)
keys[MAX-2] = MAX*2
assert list(d.items()) == items
assert list(d.keys()) == keys
print(list(d.items()))
# __getitem__
assert d[MAX*2] == MAX-1
# __setitem__
d[MAX*2] = MAX*3
items[MAX-2] = (MAX*2, MAX*3)
keys[MAX-2] = MAX*2
assert list(d.items()) == items
assert list(d.keys()) == keys
# __delitem__
del d[MAX]
items = items[:-1]
keys = keys[:-1]
assert list(d.items()) == items
assert list(d.keys()) == keys
d[MAX] = MAX
items.append((MAX, MAX))
keys.append(MAX)
# __iter__
assert list(d) == keys
# __reversed__
print(list(reversed(d.items())))
assert list(reversed(d)) == list(reversed(keys))
assert list(reversed(d.keys())) == list(reversed(keys))
assert list(reversed(d.items())) == list(reversed(items))
# pop_item()
assert d.popitem() == (MAX, MAX)
assert d.popitem() == (MAX*2, MAX*3)
items = items[:-2]
keys = keys[:-2]
assert list(d.items()) == items
assert list(d.keys()) == keys
# update()
d.update({1: 1000, MAX-2: MAX*4})
items[0] = (1, 1000)
items[MAX-3] = (MAX-2, MAX*4)
assert list(d.items()) == items
assert list(d.keys()) == keys
# move_to_end()
d.move_to_end(1)
items = items[1:] + [items[0]]
keys = keys[1:] + [keys[0]]
assert list(d.items()) == items
assert list(d.keys()) == keys
# __eq__
d.change_key(1, 2000)
other_d = OrderedDictX(d.items())
assert d == other_d
assert other_d == d
In my case, I had a function call returning a dict, which had a key I was hoping to rename in a single line, so none of these worked for me. Starting in python 3.8, you can use the walrus operator to keep it to one line if you are not looking for an inplace operation and the dict is not yet defined.
old_dict = get_dict()
# old_dict = {'a': 1, 'b': 2, 'c': 3}
new_dict = {'new1': (x := get_dict()).pop('b'), **x}
# new_dict = {'a': 1, 'new1': 2, 'c': 3}
I have combined some answers from the above thread and come up with the solution below. Although it is simple it can be used as a building block for making more complex key updates from a dictionary.
test_dict = {'a': 1, 'b': 2, 'c': 3}
print(test_dict)
# {'a': 1, 'b': 2, 'c': 3}
prefix = 'up'
def dict_key_update(json_file):
new_keys = []
old_keys = []
for i,(key,value) in enumerate(json_file.items()):
old_keys.append(key)
new_keys.append(str(prefix) + key) # i have updated by adding a prefix to the
# key
for old_key, new_key in zip(old_keys,new_keys):
print('old {}, new {}'.format(old_key, new_key))
if new_key!=old_key:
json_file[new_key] = json_file.pop(old_key)
return json_file
test_dict = dict_key_update(test_dict)
print(test_dict)
# {'upa': 1, 'upb': 2, 'upc': 3}
I would like to expand on the autovivification example given in a previous answer from nosklo to allow dictionary access by tuple.
nosklo's solution looks like this:
class AutoVivification(dict):
"""Implementation of perl's autovivification feature."""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
Testing:
a = AutoVivification()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a
Output:
{1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
I have a case where I want to set a node given some arbitrary tuple of subscripts. If I don't know how many layers deep the tuple will be, how can I design a way to set the appropriate node?
I'm thinking that perhaps I could use syntax like the following:
mytuple = (1,2,3)
a[mytuple] = 4
But I'm having trouble coming up with a working implementation.
Update
I have a fully working example based on #JCash's answer:
class NestedDict(dict):
"""
Nested dictionary of arbitrary depth with autovivification.
Allows data access via extended slice notation.
"""
def __getitem__(self, keys):
# Let's assume *keys* is a list or tuple.
if not isinstance(keys, basestring):
try:
node = self
for key in keys:
node = dict.__getitem__(node, key)
return node
except TypeError:
# *keys* is not a list or tuple.
pass
try:
return dict.__getitem__(self, keys)
except KeyError:
raise KeyError(keys)
def __setitem__(self, keys, value):
# Let's assume *keys* is a list or tuple.
if not isinstance(keys, basestring):
try:
node = self
for key in keys[:-1]:
try:
node = dict.__getitem__(node, key)
except KeyError:
node[key] = type(self)()
node = node[key]
return dict.__setitem__(node, keys[-1], value)
except TypeError:
# *keys* is not a list or tuple.
pass
dict.__setitem__(self, keys, value)
Which can achieve the same output as above using extended slice notation:
d = NestedDict()
d[1,2,3] = 4
d[1,3,3] = 5
d[1,2,'test'] = 6
This seems to work
def __setitem__(self, key, value):
if isinstance(key, tuple):
node = self
for i in key[:-1]:
try:
node = dict.__getitem__(node, i)
except KeyError:
node = node[i] = type(self)()
return dict.__setitem__(node, i, value)
return dict.__setitem__(self, key, value)
I'm trying to write a very simple function to recursively search through a possibly nested (in the most extreme cases ten levels deep) Python dictionary and return the first value it finds from the given key.
I cannot understand why my code doesn't work for nested dictionaries.
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
_finditem(v, key)
print _finditem({"B":{"A":2}},"A")
It returns None.
It does work, however, for _finditem({"B":1,"A":2},"A"), returning 2.
I'm sure it's a simple mistake but I cannot find it. I feel like there already might be something for this in the standard library or collections, but I can't find that either.
If you are looking for a general explanation of what is wrong with code like this, the canonical is Why does my recursive function return None?. The answers here are mostly specific to the task of searching in a nested dictionary.
when you recurse, you need to return the result of _finditem
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
return _finditem(v, key) #added return statement
To fix the actual algorithm, you need to realize that _finditem returns None if it didn't find anything, so you need to check that explicitly to prevent an early return:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
Of course, that will fail if you have None values in any of your dictionaries. In that case, you could set up a sentinel object() for this function and return that in the case that you don't find anything -- Then you can check against the sentinel to know if you found something or not.
Here's a function that searches a dictionary that contains both nested dictionaries and lists. It creates a list of the values of the results.
def get_recursively(search_dict, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
fields_found = []
for key, value in search_dict.iteritems():
if key == field:
fields_found.append(value)
elif isinstance(value, dict):
results = get_recursively(value, field)
for result in results:
fields_found.append(result)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_recursively(item, field)
for another_result in more_results:
fields_found.append(another_result)
return fields_found
Here is a way to do this using a "stack" and the "stack of iterators" pattern (credits to Gareth Rees):
def search(d, key, default=None):
"""Return a value corresponding to the specified key in the (possibly
nested) dictionary d. If there is no item with that key, return
default.
"""
stack = [iter(d.items())]
while stack:
for k, v in stack[-1]:
if isinstance(v, dict):
stack.append(iter(v.items()))
break
elif k == key:
return v
else:
stack.pop()
return default
The print(search({"B": {"A": 2}}, "A")) would print 2.
Just trying to make it shorter:
def get_recursively(search_dict, field):
if isinstance(search_dict, dict):
if field in search_dict:
return search_dict[field]
for key in search_dict:
item = get_recursively(search_dict[key], field)
if item is not None:
return item
elif isinstance(search_dict, list):
for element in search_dict:
item = get_recursively(element, field)
if item is not None:
return item
return None
Here's a Python 3.3+ solution which can handle lists of lists of dicts.
It also uses duck typing, so it can handle any iterable, or object implementing the 'items' method.
from typing import Iterator
def deep_key_search(obj, key: str) -> Iterator:
""" Do a deep search of {obj} and return the values of all {key} attributes found.
:param obj: Either a dict type object or an iterator.
:return: Iterator of all {key} values found"""
if isinstance(obj, str):
# When duck-typing iterators recursively, we must exclude strings
return
try:
# Assume obj is a like a dict and look for the key
for k, v in obj.items():
if k == key:
yield v
else:
yield from deep_key_search(v, key)
except AttributeError:
# Not a dict type object. Is it iterable like a list?
try:
for v in obj:
yield from deep_key_search(v, key)
except TypeError:
pass # Not iterable either.
Pytest:
#pytest.mark.parametrize(
"data, expected, dscr", [
({}, [], "Empty dict"),
({'Foo': 1, 'Bar': 2}, [1], "Plain dict"),
([{}, {'Foo': 1, 'Bar': 2}], [1], "List[dict]"),
([[[{'Baz': 3, 'Foo': 'a'}]], {'Foo': 1, 'Bar': 2}], ['a', 1], "Deep list"),
({'Foo': 1, 'Bar': {'Foo': 'c'}}, [1, 'c'], "Dict of Dict"),
(
{'Foo': 1, 'Bar': {'Foo': 'c', 'Bar': 'abcdef'}},
[1, 'c'], "Contains a non-selected string value"
),
])
def test_deep_key_search(data, expected, dscr):
assert list(deep_key_search(data, 'Foo')) == expected
I couldn't add a comment to the accepted solution proposed by #mgilston because of lack of reputation. The solution doesn't work if the key being searched for is inside a list.
Looping through the elements of the lists and calling the recursive function should extend the functionality to find elements inside nested lists:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
elif isinstance(v,list):
for list_item in v:
item = _finditem(list_item, key)
if item is not None:
return item
print(_finditem({"C": {"B": [{"A":2}]}}, "A"))
I had to create a general-case version that finds a uniquely-specified key (a minimal dictionary that specifies the path to the desired value) in a dictionary that contains multiple nested dictionaries and lists.
For the example below, a target dictionary is created to search, and the key is created with the wildcard "???". When run, it returns the value "D"
def lfind(query_list:List, target_list:List, targ_str:str = "???"):
for tval in target_list:
#print("lfind: tval = {}, query_list[0] = {}".format(tval, query_list[0]))
if isinstance(tval, dict):
val = dfind(query_list[0], tval, targ_str)
if val:
return val
elif tval == query_list[0]:
return tval
def dfind(query_dict:Dict, target_dict:Dict, targ_str:str = "???"):
for key, qval in query_dict.items():
tval = target_dict[key]
#print("dfind: key = {}, qval = {}, tval = {}".format(key, qval, tval))
if isinstance(qval, dict):
val = dfind(qval, tval, targ_str)
if val:
return val
elif isinstance(qval, list):
return lfind(qval, tval, targ_str)
else:
if qval == targ_str:
return tval
if qval != tval:
break
def find(target_dict:Dict, query_dict:Dict):
result = dfind(query_dict, target_dict)
return result
target_dict = {"A":[
{"key1":"A", "key2":{"key3": "B"}},
{"key1":"C", "key2":{"key3": "D"}}]
}
query_dict = {"A":[{"key1":"C", "key2":{"key3": "???"}}]}
result = find(target_dict, query_dict)
print("result = {}".format(result))
Thought I'd throw my hat in the ring, this will allow for recursive requests on anything that implements a __getitem__ method.
def _get_recursive(obj, args, default=None):
"""Apply successive requests to an obj that implements __getitem__ and
return result if something is found, else return default"""
if not args:
return obj
try:
key, *args = args
_obj = object.__getitem__(obj, key)
return _get_recursive(_obj, args, default=default)
except (KeyError, IndexError, AttributeError):
return default