Related
I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value
Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]
It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.
Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)
This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.
How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value
Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2
Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.
Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}
Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}
An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.
It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]
You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html
If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value
How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()
Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None
I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])
a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one
Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data
I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''
You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.
I need perform search in some dictionary structure :
dic_global = {
'key_lev1_1': {
'key_lev2_a': 'some_value_1',
'key_lev2_b': 'some_value_2'
},
'key_lev1_2': 'some_value_111'
}
and I need to perform recursive search for item in the whole structure so that function would return the key where the item was.
so if fun found 'key_lev2_a' it returns 'key_lev1_1' - the name of enclosure the key where the dict with 'key_lev2_a' is.
is it possible ?
def find_key(obj, key):
if key in obj:
return obj
for k, v in obj.items():
if isinstance(v, dict):
item = find_key(v, key)
if item is not None:
return k
(source: modified Get key by value in dictionary)
Is there a way to rename a dictionary key, without reassigning its value to a new name and removing the old name key; and without iterating through dict key/value?
In case of OrderedDict do the same, while keeping that key's position.
For a regular dict, you can use:
mydict[k_new] = mydict.pop(k_old)
This will move the item to the end of the dict, unless k_new was already existing in which case it will overwrite the value in-place.
For a Python 3.7+ dict where you additionally want to preserve the ordering, the simplest is to rebuild an entirely new instance. For example, renaming key 2 to 'two':
>>> d = {0:0, 1:1, 2:2, 3:3}
>>> {"two" if k == 2 else k:v for k,v in d.items()}
{0: 0, 1: 1, 'two': 2, 3: 3}
The same is true for an OrderedDict, where you can't use dict comprehension syntax, but you can use a generator expression:
OrderedDict((k_new if k == k_old else k, v) for k, v in od.items())
Modifying the key itself, as the question asks for, is impractical because keys are hashable which usually implies they're immutable and can't be modified.
Using a check for newkey!=oldkey, this way you can do:
if newkey!=oldkey:
dictionary[newkey] = dictionary[oldkey]
del dictionary[oldkey]
In case of renaming all dictionary keys:
target_dict = {'k1':'v1', 'k2':'v2', 'k3':'v3'}
new_keys = ['k4','k5','k6']
for key,n_key in zip(target_dict.keys(), new_keys):
target_dict[n_key] = target_dict.pop(key)
You can use this OrderedDict recipe written by Raymond Hettinger and modify it to add a rename method, but this is going to be a O(N) in complexity:
def rename(self,key,new_key):
ind = self._keys.index(key) #get the index of old key, O(N) operation
self._keys[ind] = new_key #replace old key with new key in self._keys
self[new_key] = self[key] #add the new key, this is added at the end of self._keys
self._keys.pop(-1) #pop the last item in self._keys
Example:
dic = OrderedDict((("a",1),("b",2),("c",3)))
print dic
dic.rename("a","foo")
dic.rename("b","bar")
dic["d"] = 5
dic.rename("d","spam")
for k,v in dic.items():
print k,v
output:
OrderedDict({'a': 1, 'b': 2, 'c': 3})
foo 1
bar 2
c 3
spam 5
A few people before me mentioned the .pop trick to delete and create a key in a one-liner.
I personally find the more explicit implementation more readable:
d = {'a': 1, 'b': 2}
v = d['b']
del d['b']
d['c'] = v
The code above returns {'a': 1, 'c': 2}
Suppose you want to rename key k3 to k4:
temp_dict = {'k1':'v1', 'k2':'v2', 'k3':'v3'}
temp_dict['k4']= temp_dict.pop('k3')
Other answers are pretty good.But in python3.6, regular dict also has order. So it's hard to keep key's position in normal case.
def rename(old_dict,old_name,new_name):
new_dict = {}
for key,value in zip(old_dict.keys(),old_dict.values()):
new_key = key if key != old_name else new_name
new_dict[new_key] = old_dict[key]
return new_dict
In Python 3.6 (onwards?) I would go for the following one-liner
test = {'a': 1, 'old': 2, 'c': 3}
old_k = 'old'
new_k = 'new'
new_v = 4 # optional
print(dict((new_k, new_v) if k == old_k else (k, v) for k, v in test.items()))
which produces
{'a': 1, 'new': 4, 'c': 3}
May be worth noting that without the print statement the ipython console/jupyter notebook present the dictionary in an order of their choosing...
I am using #wim 's answer above, with dict.pop() when renaming keys, but I found a gotcha. Cycling through the dict to change the keys, without separating the list of old keys completely from the dict instance, resulted in cycling new, changed keys into the loop, and missing some existing keys.
To start with, I did it this way:
for current_key in my_dict:
new_key = current_key.replace(':','_')
fixed_metadata[new_key] = fixed_metadata.pop(current_key)
I found that cycling through the dict in this way, the dictionary kept finding keys even when it shouldn't, i.e., the new keys, the ones I had changed! I needed to separate the instances completely from each other to (a) avoid finding my own changed keys in the for loop, and (b) find some keys that were not being found within the loop for some reason.
I am doing this now:
current_keys = list(my_dict.keys())
for current_key in current_keys:
and so on...
Converting the my_dict.keys() to a list was necessary to get free of the reference to the changing dict. Just using my_dict.keys() kept me tied to the original instance, with the strange side effects.
In case someone wants to rename all the keys at once providing a list with the new names:
def rename_keys(dict_, new_keys):
"""
new_keys: type List(), must match length of dict_
"""
# dict_ = {oldK: value}
# d1={oldK:newK,} maps old keys to the new ones:
d1 = dict( zip( list(dict_.keys()), new_keys) )
# d1{oldK} == new_key
return {d1[oldK]: value for oldK, value in dict_.items()}
I came up with this function which does not mutate the original dictionary. This function also supports list of dictionaries too.
import functools
from typing import Union, Dict, List
def rename_dict_keys(
data: Union[Dict, List[Dict]], old_key: str, new_key: str
):
"""
This function renames dictionary keys
:param data:
:param old_key:
:param new_key:
:return: Union[Dict, List[Dict]]
"""
if isinstance(data, dict):
res = {k: v for k, v in data.items() if k != old_key}
try:
res[new_key] = data[old_key]
except KeyError:
raise KeyError(
"cannot rename key as old key '%s' is not present in data"
% old_key
)
return res
elif isinstance(data, list):
return list(
map(
functools.partial(
rename_dict_keys, old_key=old_key, new_key=new_key
),
data,
)
)
raise ValueError("expected type List[Dict] or Dict got '%s' for data" % type(data))
#helloswift123 I like your function. Here is a modification to rename multiple keys in a single call:
def rename(d, keymap):
"""
:param d: old dict
:type d: dict
:param keymap: [{:keys from-keys :values to-keys} keymap]
:returns: new dict
:rtype: dict
"""
new_dict = {}
for key, value in zip(d.keys(), d.values()):
new_key = keymap.get(key, key)
new_dict[new_key] = d[key]
return new_dict
You can use below code:
OldDict={'a':'v1', 'b':'v2', 'c':'v3'}
OldKey=['a','b','c']
NewKey=['A','B','C']
def DictKeyChanger(dict,OldKey,NewKey):
ListAllKey=list(dict.keys())
for x in range(0,len(NewKey)):
dict[NewKey[x]]=dict[OldKey[x]] if OldKey[x] in ListAllKey else None
for x in ListAllKey:
dict.pop(x)
return dict
NewDict=DictKeyChanger(OldDict,OldKey,NewKey)
print(NewDict)#===>>{'A': 'v1', 'B': 'v2', 'C': 'v3'}
Notes:
The length of list OldKey and list NewKey must be equal.
The length of the list OldKey must be equal to the listNewKey, if the key does not exist in the OldKey, put 'noexis' instead as shown as.
Example:
OldDict={'a':'v1', 'b':'v2', 'c':'v3'}
OldKey=['a','b','c','noexis','noexis']
NewKey=['A','B','C','D','E']
NewDict=DictKeyChanger(OldDict,OldKey,NewKey)
print(NewDict)#===>>{'A': 'v1', 'B': 'v2', 'C': 'v3', 'D': None, 'E': None}
For the keeping of order case (the other one is trivial, remove old and add new one): I was not satisfied with the ordered-dictionary needing reconstruction (at least partially), obviously for efficiency reasons, so I've put together a class (OrderedDictX) that extends OrderedDict and allows you to do key changes efficiently, i.e. in O(1) complexity. The implementation can also be adjusted for the now-ordered built-in dict class.
It uses 2 extra dictionaries to remap the changed keys ("external" - i.e. as they appear externally to the user) to the ones in the underlying OrderedDict ("internal") - the dictionaries will only hold keys that were changed so as long as no key changing is done they will be empty.
Performance measurements:
import timeit
import random
# Efficiency tests
from collections import MutableMapping
class OrderedDictRaymond(dict, MutableMapping):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at 1 argument, got %d', len(args))
if not hasattr(self, '_keys'):
self._keys = []
self.update(*args, **kwds)
def rename(self,key,new_key):
ind = self._keys.index(key) #get the index of old key, O(N) operation
self._keys[ind] = new_key #replace old key with new key in self._keys
self[new_key] = self[key] #add the new key, this is added at the end of self._keys
self._keys.pop(-1) #pop the last item in self._keys
dict.__delitem__(self, key)
def clear(self):
del self._keys[:]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
self._keys.append(key)
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
self._keys.remove(key)
def __iter__(self):
return iter(self._keys)
def __reversed__(self):
return reversed(self._keys)
def popitem(self):
if not self:
raise KeyError
key = self._keys.pop()
value = dict.pop(self, key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
inst_dict.pop('_keys', None)
return (self.__class__, (items,), inst_dict)
setdefault = MutableMapping.setdefault
update = MutableMapping.update
pop = MutableMapping.pop
keys = MutableMapping.keys
values = MutableMapping.values
items = MutableMapping.items
def __repr__(self):
pairs = ', '.join(map('%r: %r'.__mod__, self.items()))
return '%s({%s})' % (self.__class__.__name__, pairs)
def copy(self):
return self.__class__(self)
#classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
class obj_container:
def __init__(self, obj) -> None:
self.obj = obj
def change_key_splice(container, k_old, k_new):
od = container.obj
container.obj = OrderedDict((k_new if k == k_old else k, v) for k, v in od.items())
def change_key_raymond(container, k_old, k_new):
od = container.obj
od.rename(k_old, k_new)
def change_key_odx(container, k_old, k_new):
odx = container.obj
odx.change_key(k_old, k_new)
NUM_ITEMS = 20000
od_splice = OrderedDict([(x, x) for x in range(NUM_ITEMS)])
od_raymond = OrderedDictRaymond(od_splice.items())
odx = OrderedDictX(od_splice.items())
od_splice, od_raymond, odx = [obj_container(d) for d in [od_splice, od_raymond, odx]]
assert odx.obj == od_splice.obj
assert odx.obj == od_raymond.obj
# Pick randomly half of the keys to change
keys_to_change = random.sample(range(NUM_ITEMS), NUM_ITEMS//2)
print(f'OrderedDictX: {timeit.timeit(lambda: [change_key_odx(odx, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
print(f'OrderedDictRaymond: {timeit.timeit(lambda: [change_key_raymond(od_raymond, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
print(f'Splice: {timeit.timeit(lambda: [change_key_splice(od_splice, k, k+NUM_ITEMS) for k in keys_to_change], number=1)}')
assert odx.obj == od_splice.obj
assert odx.obj == od_raymond.obj
And results:
OrderedDictX: 0.06587849999999995
OrderedDictRaymond: 1.1131364
Splice: 1165.2614647
As expected, the splicing method is extremely slow (didn't expect it to be that much slower either though) and uses a lot of memory, and the O(N) solution of #Ashwini Chaudhary (bug-fixed though, del also needed) is also slower, 17X times in this example.
Of course, this solution being O(1), compared to the O(N) OrderedDictRaymond the time difference becomes much more apparent as the dictionary size increases, e.g. for 5 times more elements (100000), the O(N) is now 100X slower:
NUM_ITEMS = 100000
OrderedDictX: 0.3636919999999999
OrderedDictRaymond: 36.3963971
Here's the code, please comment if you see issues or have improvements to propose as this might still be error-prone.
from collections import OrderedDict
class OrderedDictX(OrderedDict):
def __init__(self, *args, **kwargs):
# Mappings from new->old (ext2int), old->new (int2ext).
# Only the keys that are changed (internal key doesn't match what the user sees) are contained.
self._keys_ext2int = OrderedDict()
self._keys_int2ext = OrderedDict()
self.update(*args, **kwargs)
def change_key(self, k_old, k_new):
# Validate that the old key is part of the dict
if not self.__contains__(k_old):
raise Exception(f'Cannot rename key {k_old} to {k_new}: {k_old} not existing in dict')
# Return if no changing is actually to be done
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
return
# Validate that the new key would not conflict with another one
if self.__contains__(k_new):
raise Exception(f'Cannot rename key {k_old} to {k_new}: {k_new} already in dict')
# Change the key using internal dicts mechanism
if k_old in self._keys_ext2int:
# Revert change temporarily
k_old_int = self._keys_ext2int[k_old]
del self._keys_ext2int[k_old]
k_old = k_old_int
# Check if new key matches the internal key
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
del self._keys_int2ext[k_old]
return
# Finalize key change
self._keys_ext2int[k_new] = k_old
self._keys_int2ext[k_old] = k_new
def __contains__(self, k) -> bool:
if k in self._keys_ext2int:
return True
if not super().__contains__(k):
return False
return k not in self._keys_int2ext
def __getitem__(self, k):
if not self.__contains__(k):
# Intentionally raise KeyError in ext2int
return self._keys_ext2int[k]
return super().__getitem__(self._keys_ext2int.get(k, k))
def __setitem__(self, k, v):
if k in self._keys_ext2int:
return super().__setitem__(self._keys_ext2int[k], v)
# If the key exists in the internal state but was renamed to a k_ext,
# employ this trick: make it such that it appears as if k_ext has also been renamed to k
if k in self._keys_int2ext:
k_ext = self._keys_int2ext[k]
self._keys_ext2int[k] = k_ext
k = k_ext
return super().__setitem__(k, v)
def __delitem__(self, k):
if not self.__contains__(k):
# Intentionally raise KeyError in ext2int
del self._keys_ext2int[k]
if k in self._keys_ext2int:
k_int = self._keys_ext2int[k]
del self._keys_ext2int[k]
del self._keys_int2ext[k_int]
k = k_int
return super().__delitem__(k)
def __iter__(self):
yield from self.keys()
def __reversed__(self):
for k in reversed(super().keys()):
yield self._keys_int2ext.get(k, k)
def __eq__(self, other: object) -> bool:
if not isinstance(other, dict):
return False
if len(self) != len(other):
return False
for (k, v), (k_other, v_other) in zip(self.items(), other.items()):
if k != k_other or v != v_other:
return False
return True
def update(self, *args, **kwargs):
for k, v in OrderedDict(*args, **kwargs).items():
self.__setitem__(k, v)
def popitem(self, last=True) -> tuple:
if not last:
k = next(iter(self.keys()))
else:
k = next(iter(reversed(self.keys())))
v = self.__getitem__(k)
self.__delitem__(k)
return k, v
class OrderedDictXKeysView:
def __init__(self, odx: 'OrderedDictX', orig_keys):
self._odx = odx
self._orig_keys = orig_keys
def __iter__(self):
for k in self._orig_keys:
yield self._odx._keys_int2ext.get(k, k)
def __reversed__(self):
for k in reversed(self._orig_keys):
yield self._odx._keys_int2ext.get(k, k)
class OrderedDictXItemsView:
def __init__(self, odx: 'OrderedDictX', orig_items):
self._odx = odx
self._orig_items = orig_items
def __iter__(self):
for k, v in self._orig_items:
yield self._odx._keys_int2ext.get(k, k), v
def __reversed__(self):
for k, v in reversed(self._orig_items):
yield self._odx._keys_int2ext.get(k, k), v
def keys(self):
return self.OrderedDictXKeysView(self, super().keys())
def items(self):
return self.OrderedDictXItemsView(self, super().items())
def copy(self):
return OrderedDictX(self.items())
# FIXME: move this to pytest
if __name__ == '__main__':
MAX = 25
items = [(i+1, i+1) for i in range(MAX)]
keys = [i[0] for i in items]
d = OrderedDictX(items)
# keys() before change
print(list(d.items()))
assert list(d.keys()) == keys
# __contains__ before change
assert 1 in d
# __getitem__ before change
assert d[1] == 1
# __setitem__ before change
d[1] = 100
assert d[1] == 100
d[1] = 1
assert d[1] == 1
# __delitem__ before change
assert MAX in d
del d[MAX]
assert MAX not in d
d[MAX] = MAX
assert MAX in d
print('== Tests before key change finished ==')
# change_key and __contains__
assert MAX-1 in d
assert MAX*2 not in d
d.change_key(MAX-1, MAX*2)
assert MAX-1 not in d
assert MAX*2 in d
# items() and keys()
items[MAX-2] = (MAX*2, MAX-1)
keys[MAX-2] = MAX*2
assert list(d.items()) == items
assert list(d.keys()) == keys
print(list(d.items()))
# __getitem__
assert d[MAX*2] == MAX-1
# __setitem__
d[MAX*2] = MAX*3
items[MAX-2] = (MAX*2, MAX*3)
keys[MAX-2] = MAX*2
assert list(d.items()) == items
assert list(d.keys()) == keys
# __delitem__
del d[MAX]
items = items[:-1]
keys = keys[:-1]
assert list(d.items()) == items
assert list(d.keys()) == keys
d[MAX] = MAX
items.append((MAX, MAX))
keys.append(MAX)
# __iter__
assert list(d) == keys
# __reversed__
print(list(reversed(d.items())))
assert list(reversed(d)) == list(reversed(keys))
assert list(reversed(d.keys())) == list(reversed(keys))
assert list(reversed(d.items())) == list(reversed(items))
# pop_item()
assert d.popitem() == (MAX, MAX)
assert d.popitem() == (MAX*2, MAX*3)
items = items[:-2]
keys = keys[:-2]
assert list(d.items()) == items
assert list(d.keys()) == keys
# update()
d.update({1: 1000, MAX-2: MAX*4})
items[0] = (1, 1000)
items[MAX-3] = (MAX-2, MAX*4)
assert list(d.items()) == items
assert list(d.keys()) == keys
# move_to_end()
d.move_to_end(1)
items = items[1:] + [items[0]]
keys = keys[1:] + [keys[0]]
assert list(d.items()) == items
assert list(d.keys()) == keys
# __eq__
d.change_key(1, 2000)
other_d = OrderedDictX(d.items())
assert d == other_d
assert other_d == d
In my case, I had a function call returning a dict, which had a key I was hoping to rename in a single line, so none of these worked for me. Starting in python 3.8, you can use the walrus operator to keep it to one line if you are not looking for an inplace operation and the dict is not yet defined.
old_dict = get_dict()
# old_dict = {'a': 1, 'b': 2, 'c': 3}
new_dict = {'new1': (x := get_dict()).pop('b'), **x}
# new_dict = {'a': 1, 'new1': 2, 'c': 3}
I have combined some answers from the above thread and come up with the solution below. Although it is simple it can be used as a building block for making more complex key updates from a dictionary.
test_dict = {'a': 1, 'b': 2, 'c': 3}
print(test_dict)
# {'a': 1, 'b': 2, 'c': 3}
prefix = 'up'
def dict_key_update(json_file):
new_keys = []
old_keys = []
for i,(key,value) in enumerate(json_file.items()):
old_keys.append(key)
new_keys.append(str(prefix) + key) # i have updated by adding a prefix to the
# key
for old_key, new_key in zip(old_keys,new_keys):
print('old {}, new {}'.format(old_key, new_key))
if new_key!=old_key:
json_file[new_key] = json_file.pop(old_key)
return json_file
test_dict = dict_key_update(test_dict)
print(test_dict)
# {'upa': 1, 'upb': 2, 'upc': 3}
Given the following dictionary:
d = {"a":{"b":{"c":"winning!"}}}
I have this string (from an external source, and I can't change this metaphor).
k = "a.b.c"
I need to determine if the dictionary has the key 'c', so I can add it if it doesn't.
This works swimmingly for retrieving a dot notation value:
reduce(dict.get, key.split("."), d)
but I can't figure out how to 'reduce' a has_key check or anything like that.
My ultimate problem is this: given "a.b.c.d.e", I need to create all the elements necessary in the dictionary, but not stomp them if they already exist.
You could use an infinite, nested defaultdict:
>>> from collections import defaultdict
>>> infinitedict = lambda: defaultdict(infinitedict)
>>> d = infinitedict()
>>> d['key1']['key2']['key3']['key4']['key5'] = 'test'
>>> d['key1']['key2']['key3']['key4']['key5']
'test'
Given your dotted string, here's what you can do:
>>> import operator
>>> keys = "a.b.c".split(".")
>>> lastplace = reduce(operator.getitem, keys[:-1], d)
>>> lastplace.has_key(keys[-1])
False
You can set a value:
>>> lastplace[keys[-1]] = "something"
>>> reduce(operator.getitem, keys, d)
'something'
>>> d['a']['b']['c']
'something'
... or using recursion:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
def get(d, keys):
if "." in keys:
key, rest = keys.split(".", 1)
return get(d[key], rest)
else:
return d[keys]
How about an iterative approach?
def create_keys(d, keys):
for k in keys.split("."):
if not k in d: d[k] = {} #if the key isn't there yet add it to d
d = d[k] #go one level down and repeat
If you need the last key value to map to anything else than a dictionary you could pass the value as an additional argument and set this after the loop:
def create_keys(d, keys, value):
keys = keys.split(".")
for k in keys[:-1]:
if not k in d: d[k] = {}
d = d[k]
d[keys[-1]] = value
I thought this discussion was very useful, but for my purpose to only get a value (not setting it), I ran into issues when a key was not present. So, just to add my flair to the options, you can use reduce in combination of an adjusted dict.get() to accommodate the scenario that the key is not present, and then return None:
from functools import reduce
import re
from typing import Any, Optional
def find_key(dot_notation_path: str, payload: dict) -> Any:
"""Try to get a deep value from a dict based on a dot-notation"""
def get_despite_none(payload: Optional[dict], key: str) -> Any:
"""Try to get value from dict, even if dict is None"""
if not payload or not isinstance(payload, (dict, list)):
return None
# can also access lists if needed, e.g., if key is '[1]'
if (num_key := re.match(r"^\[(\d+)\]$", key)) is not None:
try:
return payload[int(num_key.group(1))]
except IndexError:
return None
else:
return payload.get(key, None)
found = reduce(get_despite_none, dot_notation_path.split("."), payload)
# compare to None, as the key could exist and be empty
if found is None:
raise KeyError()
return found
In my use case, I need to find a key within an HTTP request payload, which can often include lists as well. The following examples work:
payload = {
"haystack1": {
"haystack2": {
"haystack3": None,
"haystack4": "needle"
}
},
"haystack5": [
{"haystack6": None},
{"haystack7": "needle"}
],
"haystack8": {},
}
find_key("haystack1.haystack2.haystack4", payload)
# "needle"
find_key("haystack5.[1].haystack7", payload)
# "needle"
find_key("[0].haystack5.[1].haystack7", [payload, None])
# "needle"
find_key("haystack8", payload)
# {}
find_key("haystack1.haystack2.haystack4.haystack99", payload)
# KeyError
EDIT: added list accessor
d = {"a":{}}
k = "a.b.c".split(".")
def f(d, i):
if i >= len(k):
return "winning!"
c = k[i]
d[c] = f(d.get(c, {}), i + 1)
return d
print f(d, 0)
"{'a': {'b': {'c': 'winning!'}}}"
I'm looking for a way to update dict dictionary1 with the contents of dict update wihout overwriting levelA
dictionary1 = {
"level1": {
"level2": {"levelA": 0, "levelB": 1}
}
}
update = {
"level1": {
"level2": {"levelB": 10}
}
}
dictionary1.update(update)
print(dictionary1)
{
"level1": {
"level2": {"levelB": 10}
}
}
I know that update deletes the values in level2 because it's updating the lowest key level1.
How could I tackle this, given that dictionary1 and update can have any length?
#FM's answer has the right general idea, i.e. a recursive solution, but somewhat peculiar coding and at least one bug. I'd recommend, instead:
Python 2:
import collections
def update(d, u):
for k, v in u.iteritems():
if isinstance(v, collections.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
Python 3:
import collections.abc
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
The bug shows up when the "update" has a k, v item where v is a dict and k is not originally a key in the dictionary being updated -- #FM's code "skips" this part of the update (because it performs it on an empty new dict which isn't saved or returned anywhere, just lost when the recursive call returns).
My other changes are minor: there is no reason for the if/else construct when .get does the same job faster and cleaner, and isinstance is best applied to abstract base classes (not concrete ones) for generality.
If you happen to be using pydantic (great lib, BTW), you can use one of its utility methods:
from pydantic.utils import deep_update
dictionary1 = deep_update(dictionary1, update)
UPDATE: reference to code, as pointed by #Jorgu. If installing pydantic is not desired, the code is short enough to be copied, provided adequate licenses compatibilities.
Took me a little bit on this one, but thanks to #Alex's post, he filled in the gap I was missing. However, I came across an issue if a value within the recursive dict happens to be a list, so I thought I'd share, and extend his answer.
import collections
def update(orig_dict, new_dict):
for key, val in new_dict.iteritems():
if isinstance(val, collections.Mapping):
tmp = update(orig_dict.get(key, { }), val)
orig_dict[key] = tmp
elif isinstance(val, list):
orig_dict[key] = (orig_dict.get(key, []) + val)
else:
orig_dict[key] = new_dict[key]
return orig_dict
Same solution as the accepted one, but clearer variable naming, docstring, and fixed a bug where {} as a value would not override.
import collections
def deep_update(source, overrides):
"""
Update a nested dictionary or similar mapping.
Modify ``source`` in place.
"""
for key, value in overrides.iteritems():
if isinstance(value, collections.Mapping) and value:
returned = deep_update(source.get(key, {}), value)
source[key] = returned
else:
source[key] = overrides[key]
return source
Here are a few test cases:
def test_deep_update():
source = {'hello1': 1}
overrides = {'hello2': 2}
deep_update(source, overrides)
assert source == {'hello1': 1, 'hello2': 2}
source = {'hello': 'to_override'}
overrides = {'hello': 'over'}
deep_update(source, overrides)
assert source == {'hello': 'over'}
source = {'hello': {'value': 'to_override', 'no_change': 1}}
overrides = {'hello': {'value': 'over'}}
deep_update(source, overrides)
assert source == {'hello': {'value': 'over', 'no_change': 1}}
source = {'hello': {'value': 'to_override', 'no_change': 1}}
overrides = {'hello': {'value': {}}}
deep_update(source, overrides)
assert source == {'hello': {'value': {}, 'no_change': 1}}
source = {'hello': {'value': {}, 'no_change': 1}}
overrides = {'hello': {'value': 2}}
deep_update(source, overrides)
assert source == {'hello': {'value': 2, 'no_change': 1}}
This functions is available in the charlatan package, in charlatan.utils.
#Alex's answer is good, but doesn't work when replacing an element such as an integer with a dictionary, such as update({'foo':0},{'foo':{'bar':1}}). This update addresses it:
import collections
def update(d, u):
for k, v in u.iteritems():
if isinstance(d, collections.Mapping):
if isinstance(v, collections.Mapping):
r = update(d.get(k, {}), v)
d[k] = r
else:
d[k] = u[k]
else:
d = {k: u[k]}
return d
update({'k1': 1}, {'k1': {'k2': {'k3': 3}}})
Here's an Immutable version of recursive dictionary merge in case anybody needs it.
Based upon #Alex Martelli's answer.
Python 3.x:
import collections
from copy import deepcopy
def merge(dict1, dict2):
''' Return a new dictionary by merging two dictionaries recursively. '''
result = deepcopy(dict1)
for key, value in dict2.items():
if isinstance(value, collections.Mapping):
result[key] = merge(result.get(key, {}), value)
else:
result[key] = deepcopy(dict2[key])
return result
Python 2.x:
import collections
from copy import deepcopy
def merge(dict1, dict2):
''' Return a new dictionary by merging two dictionaries recursively. '''
result = deepcopy(dict1)
for key, value in dict2.iteritems():
if isinstance(value, collections.Mapping):
result[key] = merge(result.get(key, {}), value)
else:
result[key] = deepcopy(dict2[key])
return result
Just use python-benedict (I did it), it has a merge (deepupdate) utility method and many others. It works with python 2 / python 3 and it is well tested.
from benedict import benedict
dictionary1=benedict({'level1':{'level2':{'levelA':0,'levelB':1}}})
update={'level1':{'level2':{'levelB':10}}}
dictionary1.merge(update)
print(dictionary1)
# >> {'level1':{'level2':{'levelA':0,'levelB':10}}}
Installation: pip install python-benedict
Documentation: https://github.com/fabiocaccamo/python-benedict
Note: I am the author of this project
This question is old, but I landed here when searching for a "deep merge" solution. The answers above inspired what follows. I ended up writing my own because there were bugs in all the versions I tested. The critical point missed was, at some arbitrary depth of the two input dicts, for some key, k, the decision tree when d[k] or u[k] is not a dict was faulty.
Also, this solution does not require recursion, which is more symmetric with how dict.update() works, and returns None.
import collections
def deep_merge(d, u):
"""Do a deep merge of one dict into another.
This will update d with values in u, but will not delete keys in d
not found in u at some arbitrary depth of d. That is, u is deeply
merged into d.
Args -
d, u: dicts
Note: this is destructive to d, but not u.
Returns: None
"""
stack = [(d,u)]
while stack:
d,u = stack.pop(0)
for k,v in u.items():
if not isinstance(v, collections.Mapping):
# u[k] is not a dict, nothing to merge, so just set it,
# regardless if d[k] *was* a dict
d[k] = v
else:
# note: u[k] is a dict
if k not in d:
# add new key into d
d[k] = v
elif not isinstance(d[k], collections.Mapping):
# d[k] is not a dict, so just set it to u[k],
# overriding whatever it was
d[k] = v
else:
# both d[k] and u[k] are dicts, push them on the stack
# to merge
stack.append((d[k], v))
Minor improvements to #Alex's answer that enables updating of dictionaries of differing depths as well as limiting the depth that the update dives into the original nested dictionary (but the updating dictionary depth is not limited). Only a few cases have been tested:
def update(d, u, depth=-1):
"""
Recursively merge or update dict-like objects.
>>> update({'k1': {'k2': 2}}, {'k1': {'k2': {'k3': 3}}, 'k4': 4})
{'k1': {'k2': {'k3': 3}}, 'k4': 4}
"""
for k, v in u.iteritems():
if isinstance(v, Mapping) and not depth == 0:
r = update(d.get(k, {}), v, depth=max(depth - 1, -1))
d[k] = r
elif isinstance(d, Mapping):
d[k] = u[k]
else:
d = {k: u[k]}
return d
The code below should solve the update({'k1': 1}, {'k1': {'k2': 2}}) issue in #Alex Martelli's answer the right way.
def deepupdate(original, update):
"""Recursively update a dict.
Subdict's won't be overwritten but also updated.
"""
if not isinstance(original, abc.Mapping):
return update
for key, value in update.items():
if isinstance(value, abc.Mapping):
original[key] = deepupdate(original.get(key, {}), value)
else:
original[key] = value
return original
I used the solution #Alex Martelli suggests, but it fails
TypeError 'bool' object does not support item assignment
when the two dictionaries differ in data type at some level.
In case at the same level the element of dictionary d is just a scalar (ie. Bool) while the element of dictionary u is still dictionary the reassignment fails as no dictionary assignment is possible into scalar (like True[k]).
One added condition fixes that:
from collections import Mapping
def update_deep(d, u):
for k, v in u.items():
# this condition handles the problem
if not isinstance(d, Mapping):
d = u
elif isinstance(v, Mapping):
r = update_deep(d.get(k, {}), v)
d[k] = r
else:
d[k] = u[k]
return d
In neither of these answers the authors seem to understand the concept of updating an object stored in a dictionary nor even of iterating over dictionary items (as opposed to keys). So I had to write one which doesn't make pointless tautological dictionary stores and retrievals.
The dicts are assumed to store other dicts or simple types.
def update_nested_dict(d, other):
for k, v in other.items():
if isinstance(v, collections.Mapping):
d_v = d.get(k)
if isinstance(d_v, collections.Mapping):
update_nested_dict(d_v, v)
else:
d[k] = v.copy()
else:
d[k] = v
Or even simpler one working with any type:
def update_nested_dict(d, other):
for k, v in other.items():
d_v = d.get(k)
if isinstance(v, collections.Mapping) and isinstance(d_v, collections.Mapping):
update_nested_dict(d_v, v)
else:
d[k] = deepcopy(v) # or d[k] = v if you know what you're doing
Update to #Alex Martelli's answer to fix a bug in his code to make the solution more robust:
def update_dict(d, u):
for k, v in u.items():
if isinstance(v, collections.Mapping):
default = v.copy()
default.clear()
r = update_dict(d.get(k, default), v)
d[k] = r
else:
d[k] = v
return d
The key is that we often want to create the same type at recursion, so here we use v.copy().clear() but not {}. And this is especially useful if the dict here is of type collections.defaultdict which can have different kinds of default_factorys.
Also notice that the u.iteritems() has been changed to u.items() in Python3.
def update(value, nvalue):
if not isinstance(value, dict) or not isinstance(nvalue, dict):
return nvalue
for k, v in nvalue.items():
value.setdefault(k, dict())
if isinstance(v, dict):
v = update(value[k], v)
value[k] = v
return value
use dict or collections.Mapping
I recommend to replace {} by type(v)() in order to propagate object type of any dict subclass stored in u but absent from d. For example, this would preserve types such as collections.OrderedDict:
Python 2:
import collections
def update(d, u):
for k, v in u.iteritems():
if isinstance(v, collections.Mapping):
d[k] = update(d.get(k, type(v)()), v)
else:
d[k] = v
return d
Python 3:
import collections.abc
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, type(v)()), v)
else:
d[k] = v
return d
It could be that you stumble over a non-standard-dictionary, like me today, which has no iteritems-Attribute.
In this case it's easy to interpret this type of dictionary as a standard-dictionary. E.g.:
Python 2.7:
import collections
def update(orig_dict, new_dict):
for key, val in dict(new_dict).iteritems():
if isinstance(val, collections.Mapping):
tmp = update(orig_dict.get(key, { }), val)
orig_dict[key] = tmp
elif isinstance(val, list):
orig_dict[key] = (orig_dict[key] + val)
else:
orig_dict[key] = new_dict[key]
return orig_dict
import multiprocessing
d=multiprocessing.Manager().dict({'sample':'data'})
u={'other': 1234}
x=update(d, u)
x.items()
Python 3.8:
def update(orig_dict, new_dict):
orig_dict=dict(orig_dict)
for key, val in dict(new_dict).items():
if isinstance(val, collections.abc.Mapping):
tmp = update(orig_dict.get(key, { }), val)
orig_dict[key] = tmp
elif isinstance(val, list):
orig_dict[key] = (orig_dict[key] + val)
else:
orig_dict[key] = new_dict[key]
return orig_dict
import collections
import multiprocessing
d=multiprocessing.Manager().dict({'sample':'data'})
u={'other': 1234, "deeper": {'very': 'deep'}}
x=update(d, u)
x.items()
Thanks to hobs for his comment on Alex's answer. Indeed update({'k1': 1}, {'k1': {'k2': 2}}) will cause TypeError: 'int' object does not support item assignment.
We should check the types of the input values at the beginning of the function. So, I suggest the following function, which should solve this (and other) problem.
Python 3:
from collections.abc import Mapping
def deep_update(d1, d2):
if all((isinstance(d, Mapping) for d in (d1, d2))):
for k, v in d2.items():
d1[k] = deep_update(d1.get(k), v)
return d1
return d2
I know this question is pretty old, but still posting what I do when I have to update a nested dictionary. We can use the fact that dicts are passed by reference in python
Assuming that the path of the key is known and is dot separated. Forex if we have a dict named data:
{
"log_config_worker": {
"version": 1,
"root": {
"handlers": [
"queue"
],
"level": "DEBUG"
},
"disable_existing_loggers": true,
"handlers": {
"queue": {
"queue": null,
"class": "myclass1.QueueHandler"
}
}
},
"number_of_archived_logs": 15,
"log_max_size": "300M",
"cron_job_dir": "/etc/cron.hourly/",
"logs_dir": "/var/log/patternex/",
"log_rotate_dir": "/etc/logrotate.d/"
}
And we want to update the queue class, the path of the key would be - log_config_worker.handlers.queue.class
We can use the following function to update the value:
def get_updated_dict(obj, path, value):
key_list = path.split(".")
for k in key_list[:-1]:
obj = obj[k]
obj[key_list[-1]] = value
get_updated_dict(data, "log_config_worker.handlers.queue.class", "myclass2.QueueHandler")
This would update the dictionary correctly.
I made a simple function, in which you give the key, the new value and the dictionary as input, and it recursively updates it with the value:
def update(key,value,dictionary):
if key in dictionary.keys():
dictionary[key] = value
return
dic_aux = []
for val_aux in dictionary.values():
if isinstance(val_aux,dict):
dic_aux.append(val_aux)
for i in dic_aux:
update(key,value,i)
for [key2,val_aux2] in dictionary.items():
if isinstance(val_aux2,dict):
dictionary[key2] = val_aux2
dictionary1={'level1':{'level2':{'levelA':0,'levelB':1}}}
update('levelB',10,dictionary1)
print(dictionary1)
#output: {'level1': {'level2': {'levelA': 0, 'levelB': 10}}}
Hope it answers.
Yes! And another solution. My solution differs in the keys that are being checked.
In all other solutions we only look at the keys in dict_b. But here we look in the union of both dictionaries.
Do with it as you please
def update_nested(dict_a, dict_b):
set_keys = set(dict_a.keys()).union(set(dict_b.keys()))
for k in set_keys:
v = dict_a.get(k)
if isinstance(v, dict):
new_dict = dict_b.get(k, None)
if new_dict:
update_nested(v, new_dict)
else:
new_value = dict_b.get(k, None)
if new_value:
dict_a[k] = new_value
If you want to replace a "full nested dictionary with arrays" you can use this snippet :
It will replace any "old_value" by "new_value". It's roughly doing a depth-first rebuilding of the dictionary. It can even work with List or Str/int given as input parameter of first level.
def update_values_dict(original_dict, future_dict, old_value, new_value):
# Recursively updates values of a nested dict by performing recursive calls
if isinstance(original_dict, Dict):
# It's a dict
tmp_dict = {}
for key, value in original_dict.items():
tmp_dict[key] = update_values_dict(value, future_dict, old_value, new_value)
return tmp_dict
elif isinstance(original_dict, List):
# It's a List
tmp_list = []
for i in original_dict:
tmp_list.append(update_values_dict(i, future_dict, old_value, new_value))
return tmp_list
else:
# It's not a dict, maybe a int, a string, etc.
return original_dict if original_dict != old_value else new_value
Another way of using recursion:
def updateDict(dict1,dict2):
keys1 = list(dict1.keys())
keys2= list(dict2.keys())
keys2 = [x for x in keys2 if x in keys1]
for x in keys2:
if (x in keys1) & (type(dict1[x]) is dict) & (type(dict2[x]) is dict):
updateDict(dict1[x],dict2[x])
else:
dict1.update({x:dict2[x]})
return(dict1)
Credit to: #Gustavo Alves Casqueiro for original answer
I honestly would have preferred using a lib that could do the heavy lifting for me, but I just couldn't find something that did what I needed.
I have only added a couple of additional checks to this function.
I have included a check for lists within a dict and added a parameter for the name of a nested dict to correctly update the nested dict KEY when there may be another KEY within the OUTER dict with the same name.
Updated function:
def update(dictionary: dict[str, any], key: str, value: any, nested_dict_name: str = None) -> dict[str, any]:
if not nested_dict_name: # if current (outermost) dict should be updated
if key in dictionary.keys(): # check if key exists in current dict
dictionary[key] = value
return dictionary
else: # if nested dict should be updated
if nested_dict_name in dictionary.keys(): # check if dict is in next layer
if isinstance(dictionary[nested_dict_name], dict):
if key in dictionary[nested_dict_name].keys(): # check if key exists in current dict
dictionary[nested_dict_name][key] = value
return dictionary
if isinstance(dictionary[nested_dict_name], list):
list_index = random.choice(range(len(dictionary[nested_dict_name]))) # pick a random dict from the list
if key in dictionary[nested_dict_name][list_index].keys(): # check if key exists in current dict
dictionary[nested_dict_name][list_index][key] = value
return dictionary
dic_aux = []
# this would only run IF the above if-statement was not able to identity and update a dict
for val_aux in dictionary.values():
if isinstance(val_aux, dict):
dic_aux.append(val_aux)
# call the update function again for recursion
for i in dic_aux:
return update(dictionary=i, key=key, value=value, nested_dict_name=nested_dict_name)
Original dict:
{
"level1": {
"level2": {
"myBool": "Original",
"myInt": "Original"
},
"myInt": "Original",
"myBool": "Original"
},
"myStr": "Original",
"level3": [
{
"myList": "Original",
"myInt": "Original",
"myBool": "Original"
}
],
"level4": [
{
"myList": "Original",
"myInt": "UPDATED",
"myBool": "Original"
}
],
"level5": {
"level6": {
"myBool": "Original",
"myInt": "Original"
},
"myInt": "Original",
"myBool": "Original"
}
}
Data for updating (using pytest):
#pytest.fixture(params=[(None, 'myStr', 'UPDATED'),
('level1', 'myInt', 'UPDATED'),
('level2', 'myBool', 'UPDATED'),
('level3', 'myList', 'UPDATED'),
('level4', 'myInt', 'UPDATED'),
('level5', 'myBool', 'UPDATED')])
def sample_data(request):
return request.param
The 'UPDATED' parameter doesn't make sense in this smaller use case (since I could just hard-code it), but for simplicity when reading the logs, I didn't want to see multiple data-types and just made it show me an 'UPDATED' string.
Test:
#pytest.mark.usefixtures('sample_data')
def test_this(sample_data):
nested_dict, param, update_value = sample_data
if nested_dict is None:
print(f'\nDict Value: Level0\nParam: {param}\nUpdate Value: {update_value}')
else:
print(f'\nDict Value: {nested_dict}\nParam: {param}\nUpdate Value: {update_value}')
# initialise data dict
data_object = # insert data here (see example dict above)
# first print as is
print(f'\nOriginal Dict:\n{data_object}')
update(dictionary=data_object,
key=param,
value=update_value,
nested_dict_name=nested_dict)
# print updated
print(f'\nUpdated Dict:\n{data_object}')
There is one caveat, when you have a dict like this:
{
"level1": {
"level2": {
"myBool": "Original"
},
"myBool": "Original"
},
"level3": {
"level2": {
"myBool": "Original"
},
"myInt": "Original"
}
}
Where level2 is under level1 AND level3. This would require making using of a list or something with the nested_dict_name and passing in the name of the outer dict AND inner dict (['level5', 'level2']) and then somehow looping through the values to find that dict.
However, since I haven't yet ran into this issue for the data objects I use, I haven't spent the time trying to solve this "issue".
Convert your dictionaries into NestedDict
from ndicts.ndicts import NestedDict
dictionary1 = {'level1': {'level2': {'levelA': 0, 'levelB': 1}}}
update = {'level1': {'level2': {'levelB': 10}}}
nd, nd_update = NestedDict(dictionary1), NestedDict(update)
Then just use update
>>> nd.update(nd_update)
>>> nd
NestedDict({'level1': {'level2': {'levelA': 0, 'levelB': 10}}})
If you need the result as a dictionary nd.to_dict()
To install ndicts pip install ndicts
d is dict to update, u is dict-updater.
def recursively_update_dict(d: dict, u: dict):
for k, v in u.items():
if isinstance(v, dict):
d.setdefault(k, {})
recursively_update_dict(d[k], v)
else:
d[k] = v
Or for defaultdict
from collections import defaultdict
def recursively_update_defaultdict(d: defaultdict[dict], u: dict):
for k, v in u.items():
if isinstance(v, dict):
recursively_update_dict(d[k], v)
else:
d[k] = v
a new Q
how to By a keys chain
dictionary1={'level1':{'level2':{'levelA':0,'levelB':1}},'anotherLevel1':{'anotherLevel2':{'anotherLevelA':0,'anotherLevelB':1}}}
update={'anotherLevel1':{'anotherLevel2':1014}}
dictionary1.update(update)
print dictionary1
{'level1':{'level2':{'levelA':0,'levelB':1}},'anotherLevel1':{'anotherLevel2':1014}}
you could try this, it works with lists and is pure:
def update_keys(newd, dic, mapping):
def upsingle(d,k,v):
if k in mapping:
d[mapping[k]] = v
else:
d[k] = v
for ekey, evalue in dic.items():
upsingle(newd, ekey, evalue)
if type(evalue) is dict:
update_keys(newd, evalue, mapping)
if type(evalue) is list:
upsingle(newd, ekey, [update_keys({}, i, mapping) for i in evalue])
return newd
That's a bit to the side but do you really need nested dictionaries? Depending on the problem, sometimes flat dictionary may suffice... and look good at it:
>>> dict1 = {('level1','level2','levelA'): 0}
>>> dict1['level1','level2','levelB'] = 1
>>> update = {('level1','level2','levelB'): 10}
>>> dict1.update(update)
>>> print dict1
{('level1', 'level2', 'levelB'): 10, ('level1', 'level2', 'levelA'): 0}