I know there are several similar questions out there, but my question is quite different and difficult for me.
I have two dictionaries:
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
i.e. d1 has key 'a', and d2 has keys 'a' and 'newa' (in other words d1 is my old dict and d2 is my new dict).
I want to iterate over these dictionaries such that, if the key is same check for its value (nested dict), e.g. when I find key 'a' in d2, I will check whether there is 'b', if yes check value of 'cs' (changed from 10 to 30), if this value is changed I want to print it.
Another case is, I want to get key 'newa' from d2 as the newly added key.
Hence, after iterating through these 2 dicts, this is the expected output:
"d2" has new key "newa"
Value of "cs" is changed from 10 to 30 of key "b" which is of key "a"
I have the following code with me, I am trying with many loops which are not working though, but is not a good option too, hence I am looking to find whether I can get expected output with a recursive piece of code.
for k, v in d1.iteritems():
for k1, v1 in d2.iteritems():
if k is k1:
print k
for k2 in v:
for k3 in v1:
if k2 is k3:
print k2, "sub key matched"
else:
print "sorry no match found"
comparing 2 dictionaries using recursion:
Edited for python 3 (works for python 2 as well):
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
def findDiff(d1, d2, path=""):
for k in d1:
if k in d2:
if type(d1[k]) is dict:
findDiff(d1[k],d2[k], "%s -> %s" % (path, k) if path else k)
if d1[k] != d2[k]:
result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
print("\n".join(result))
else:
print ("%s%s as key not in d2\n" % ("%s: " % path if path else "", k))
print("comparing d1 to d2:")
findDiff(d1,d2)
print("comparing d2 to d1:")
findDiff(d2,d1)
Python 2 old answer:
def findDiff(d1, d2, path=""):
for k in d1:
if (k not in d2):
print (path, ":")
print (k + " as key not in d2", "\n")
else:
if type(d1[k]) is dict:
if path == "":
path = k
else:
path = path + "->" + k
findDiff(d1[k],d2[k], path)
else:
if d1[k] != d2[k]:
print (path, ":")
print (" - ", k," : ", d1[k])
print (" + ", k," : ", d2[k])
Output:
comparing d1 to d2:
a -> b:
- cs : 10
+ cs : 30
comparing d2 to d1:
a -> b:
- cs : 30
+ cs : 10
Modified user3's code to make it even better
d1= {'as': 1, 'a':
{'b':
{'cs':10,
'qqq': {'qwe':1}
},
'd': {'csd':30}
}
}
d2= {'as': 3, 'a':
{'b':
{'cs':30,
'qqq': 123
},
'd':{'csd':20}
},
'newa':
{'q':
{'cs':50}
}
}
def compare_dictionaries(dict_1, dict_2, dict_1_name, dict_2_name, path=""):
"""Compare two dictionaries recursively to find non mathcing elements
Args:
dict_1: dictionary 1
dict_2: dictionary 2
Returns:
"""
err = ''
key_err = ''
value_err = ''
old_path = path
for k in dict_1.keys():
path = old_path + "[%s]" % k
if not dict_2.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_2_name)
else:
if isinstance(dict_1[k], dict) and isinstance(dict_2[k], dict):
err += compare_dictionaries(dict_1[k],dict_2[k],'d1','d2', path)
else:
if dict_1[k] != dict_2[k]:
value_err += "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (dict_1_name, path, dict_1[k], dict_2_name, path, dict_2[k])
for k in dict_2.keys():
path = old_path + "[%s]" % k
if not dict_1.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_1_name)
return key_err + value_err + err
a = compare_dictionaries(d1,d2,'d1','d2')
print a
Output:
Key d2[newa] not in d1
Value of d1[as] (1) not same as d2[as] (3)
Value of d1[a][b][cs] (10) not same as d2[a][b][cs] (30)
Value of d1[a][b][qqq] ({'qwe': 1}) not same as d2[a][b][qqq] (123)
Value of d1[a][d][csd] (30) not same as d2[a][d][csd] (20)
why not use deepdiff library.
see it at: https://github.com/seperman/deepdiff
>>> from deepdiff import DeepDiff
>>> t1 = {1:1, 3:3, 4:4}
>>> t2 = {1:1, 3:3, 5:5, 6:6}
>>> ddiff = DeepDiff(t1, t2)
>>> print(ddiff)
{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}}
of course it is more powerful, check the doc for more.
This should provide what you need with helpful functions:
For Python 2.7
def isDict(obj):
return obj.__class__.__name__ == 'dict'
def containsKeyRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey or (isDict(vDict[curKey]) and containsKeyRec(vKey, vDict[curKey])):
return True
return False
def getValueRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey:
return vDict[curKey]
elif isDict(vDict[curKey]) and getValueRec(vKey, vDict[curKey]):
return containsKeyRec(vKey, vDict[curKey])
return None
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
for key in d1:
if containsKeyRec(key, d2):
print "dict d2 contains key: " + key
d2Value = getValueRec(key, d2)
if d1[key] == d2Value:
print "values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "values are not equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "dict d2 does not contain key: " + key
For Python 3 (or higher):
def id_dict(obj):
return obj.__class__.__name__ == 'dict'
def contains_key_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key or (id_dict(v_dict[curKey]) and contains_key_rec(v_key, v_dict[curKey])):
return True
return False
def get_value_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key:
return v_dict[curKey]
elif id_dict(v_dict[curKey]) and get_value_rec(v_key, v_dict[curKey]):
return contains_key_rec(v_key, v_dict[curKey])
return None
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
for key in d1:
if contains_key_rec(key, d2):
d2_value = get_value_rec(key, d2)
if d1[key] == d2_value:
print("values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2_value))
pass
else:
print("values are not equal:\n"
"list1: " + str(d1[key]) + "\n" +
"list2: " + str(d2_value))
else:
print("dict d2 does not contain key: " + key)
For python 3 or higher,
Code for comparing any data.
def do_compare(data1, data2, data1_name, data2_name, path=""):
if operator.eq(data1, data2) and not path:
log.info("Both data have same content")
else:
if isinstance(data1, dict) and isinstance(data2, dict):
compare_dict(data1, data2, data1_name, data2_name, path)
elif isinstance(data1, list) and isinstance(data2, list):
compare_list(data1, data2, data1_name, data2_name, path)
else:
if data1 != data2:
value_err = "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1, data2_name, path, data2)
print (value_err)
# findDiff(data1, data2)
def compare_dict(data1, data2, data1_name, data2_name, path):
old_path = path
for k in data1.keys():
path = old_path + "[%s]" % k
if k not in data2:
key_err = "Key %s%s not in %s\n" % (data1_name, path, data2_name)
print (key_err)
else:
do_compare(data1[k], data2[k], data1_name, data2_name, path)
for k in data2.keys():
path = old_path + "[%s]" % k
if k not in data1:
key_err = "Key %s%s not in %s\n" % (data2_name, path, data1_name)
print (key_err)
def compare_list(data1, data2, data1_name, data2_name, path):
data1_length = len(data1)
data2_length = len(data2)
old_path = path
if data1_length != data2_length:
value_err = "No: of items in %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1_length, data2_name, path, data2_length)
print (value_err)
for index, item in enumerate(data1):
path = old_path + "[%s]" % index
try:
do_compare(data1[index], data2[index], data1_name, data2_name, path)
except IndexError:
pass
Adding a version that adds some more capabilities:
can compare arbitrarily nested JSON-like dicts and lists
lets you specify keys to ignore (e.g. in flaky unit tests)
lets you specify keys with numerical values that will be treated as equal as long as they fall within a certain percentage of each other
If you define the deep_diff function as below and call it on #rkatkam's example you'll get:
>>> deep_diff(d1, d2)
{'newa': (None, {'q': {'cs': 50}}), 'a': {'b': {'cs': (10, 30)}}}
Here's the function definition:
def deep_diff(x, y, parent_key=None, exclude_keys=[], epsilon_keys=[]):
"""
Take the deep diff of JSON-like dictionaries
No warranties when keys, or values are None
"""
# pylint: disable=unidiomatic-typecheck
EPSILON = 0.5
rho = 1 - EPSILON
if x == y:
return None
if parent_key in epsilon_keys:
xfl, yfl = float_or_None(x), float_or_None(y)
if xfl and yfl and xfl * yfl >= 0 and rho * xfl <= yfl and rho * yfl <= xfl:
return None
if not (isinstance(x, (list, dict)) and (isinstance(x, type(y)) or isinstance(y, type(x)))):
return x, y
if isinstance(x, dict):
d = type(x)() # handles OrderedDict's as well
for k in x.keys() ^ y.keys():
if k in exclude_keys:
continue
if k in x:
d[k] = (deepcopy(x[k]), None)
else:
d[k] = (None, deepcopy(y[k]))
for k in x.keys() & y.keys():
if k in exclude_keys:
continue
next_d = deep_diff(
x[k], y[k], parent_key=k, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if next_d is None:
continue
d[k] = next_d
return d if d else None
# assume a list:
d = [None] * max(len(x), len(y))
flipped = False
if len(x) > len(y):
flipped = True
x, y = y, x
for i, x_val in enumerate(x):
d[i] = (
deep_diff(
y[i], x_val, parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if flipped
else deep_diff(
x_val, y[i], parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
)
for i in range(len(x), len(y)):
d[i] = (y[i], None) if flipped else (None, y[i])
return None if all(map(lambda x: x is None, d)) else d
Adding a non-recursive solution.
# Non Recursively traverses through a large nested dictionary
# Uses a queue of dicts_to_process to keep track of what needs to be traversed rather than using recursion.
# Slightly more complex than the recursive version, but arguably better as there is no risk of stack overflow from
# too many levels of recursion
def get_dict_diff_non_recursive(dict1, dict2):
dicts_to_process=[(dict1,dict2,"")]
while dicts_to_process:
d1,d2,current_path = dicts_to_process.pop()
for key in d1.keys():
current_path = os.path.join(current_path, f"{key}")
#print(f"searching path {current_path}")
if key not in d2 or d1[key] != d2[key]:
print(f"difference at {current_path}")
if type(d1[key]) == dict:
dicts_to_process.append((d1[key],d2[key],current_path))
elif type(d1[key]) == list and d1[key] and type(d1[key][0]) == dict:
for i in range(len(d1[key])):
dicts_to_process.append((d1[key][i], d2[key][i],current_path))
I have not liked many of the answers I have found across many threads... A lot of them recommend using deepdiff which is very powerful dont get me wrong but it just does not give me the output I was desiring which is not just a string of the diffs, or a newly built strange-looking dictionary with new keys collected from the nested keys of the original... but actually return a real dictionary with the original keys and delta values.
My use case for this is to send smaller payloads or none if there is no difference over an MQTT network.
The soluton I found is partially stolen from this link, however modified it to just give me the deltas. Then I recursively parse it, calling diff_dict() again if its nested to build the final diff dictionary. It turned out to be much simpler than many examples out there. FYI it does not care about sorting.
My Solution:
def diff_dict(d1, d2):
d1_keys = set(d1.keys())
d2_keys = set(d2.keys())
shared_keys = d1_keys.intersection(d2_keys)
shared_deltas = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
added_keys = d2_keys - d1_keys
added_deltas = {o: (None, d2[o]) for o in added_keys}
deltas = {**shared_deltas, **added_deltas}
return parse_deltas(deltas)
def parse_deltas(deltas: dict):
res = {}
for k, v in deltas.items():
if isinstance(v[0], dict):
tmp = diff_dict(v[0], v[1])
if tmp:
res[k] = tmp
else:
res[k] = v[1]
return res
Example:
original = {
'int': 1,
'float': 0.1000,
'string': 'some string',
'bool': True,
'nested1': {
'int': 2,
'float': 0.2000,
'string': 'some string2',
'bool': True,
'nested2': {
'string': 'some string3'
}
}
}
new = {
'int': 2,
'string': 'some string',
'nested1': {
'int': 2,
'float': 0.5000,
'string': 'new string',
'bool': False,
'nested2': {
'string': 'new string nested 2 time'
}
},
'test_added': 'added_val'
}
print(diff_dict(original, new))
Output:
{'int': 2, 'nested1': {'string': 'new string', 'nested2': {'string': 'new string nested 2 time'}, 'bool': False, 'float': 0.5}, 'test_added': 'added_val'}
Solution
def compare_dicts(dict1, dict2, indent=4, level=0, offset=0):
if not (isinstance(dict1, dict) or isinstance(dict2, dict)):
if dict1 == dict2:
return 'OK!'
else:
return 'MISMATCH!'
if level > 0:
print()
keys1 = set(dict1.keys())
keys2 = set(dict2.keys())
if len(keys1 | keys2) == 0:
return '' if level else None
max_len = max(tuple(map(len, keys1 | keys2))) + 2
for key in keys1 & keys2:
print(' '*indent*level + f'{key+":":<{max_len}}', end='')
print(compare_dicts(dict1[key], dict2[key], indent=indent, level=level+1))
for key in keys1 - keys2:
print(' '*indent*level + f'{key+":":<{max_len}}'
+ 'presented only in dict 1!', end='')
for key in keys2 - keys1:
print(' '*indent*level + f'{key+":":<{max_len}}'
+ 'presented only in dict 2!', end='')
return '' if level else None
Example
dict1 = {
'a': 1,
'b': {
'ba': 21,
'bb': 22,
'bc': 23,
},
'c': 3,
'd': 4,
}
dict2 = {
'a': 1,
'b': {
'ba': 21,
'bb': -22,
},
'c': 3,
'd': -4,
'e': 5,
}
compare_dicts(dict1, dict2)
Output
b:
bb: MISMATCH!
ba: OK!
bc: presented only in dict 1!
a: OK!
d: MISMATCH!
c: OK!
e: presented only in dict 2!
I need to traverse a dictionary recursively and remember the previous keys.
Let me explain:
dic = {u'resources': {u'info': {u'load': (u'37', u'17')}}, u'peak': {u'load': (u'0', u'1')}}
The elements are always a value or a dictionary until it reaches a value.
I want to print the above dic like this: (omit the xxx below, it should eventually be a diff of the two values)
resources info load 37 17 xxx
resources peak load 0 1 xxx
This is the code I have so far:
def convertToTable(var):
if isinstance(var, tuple):
if len(var) != 2:
return str(var)
v1, v2 = var
try:
v1 = float(v1)
v2 = float(v2)
except ValueError:
pass
if type(v1) != type(v2):
return '\t%s\t%s\n' % (v1, v2)
elif isinstance(v1, int) or isinstance(v1, float):
sign = '+' if v2 - v1 > 0 else ''
return '\t%s\t%s\t%s%s\n' % (v1, v2, sign, v2 - v1)
elif isinstance(v1, list):
ret = ''
for i in range(max(len(v1), len(v2))):
v1v = v1[i] if i < len(v1) else ''
v2v = v2[i] if i < len(v2) else ''
ret += '\t%s, %s\n' % (v1v, v2v)
return ret
else:
return '\t%s\t%s\n' % (v1, v2)
elif isinstance(var, dict):
ret = ''
for key, value in var.iteritems():
# fix this crap, it's not printing all recursive levels of keys!
ret += '%s %s' % (key, convertToTable(value))
return ret
else:
return '%s\n' % (var)
I donĀ“t know how to pass the previous keys recursively to the function again! Either I get an extra print of keys or nothing! (please do not advice me that I should use json.dumps as it does not really do what I need!)
I am hoping someone can check my solution and point out the flaw in it!
I'm not sure what's wrong with your code, but this might do what you want:
def iteritems_recursive(d):
for k,v in d.iteritems():
if isinstance(v, dict):
for k1,v1 in iteritems_recursive(v):
yield (k,)+k1, v1
else:
yield (k,),v
dic = {u'resources': {u'info': {u'load': (u'37', u'17')}, u'peak': {u'load': (u'0', u'1')}}}
for p,v in iteritems_recursive(dic):
print p, "->", v
iteritems_recursive iterates over the passed-in dictionary, and returns a a (path, value) tuple. The path is itself a tuple which describes the keys that reach that item.
The above code prints:
(u'resources', u'info', u'load') -> (u'37', u'17')
(u'resources', u'peak', u'load') -> (u'0', u'1')
If you want to print the table pretty, replace the for loop above with this:
for p,v in iteritems_recursive(dic):
diff = float(v[0]) - float(v[1])
p = ''.join('{:10}'.format(w) for w in p)
v = ''.join('{:5}'.format(f) for f in v)
print p, v, diff
Which prints:
resources info load 37 17 20.0
resources peak load 0 1 -1.0
def convertToTable(inp, history=[]):
for key, value in inp.iteritems():
history.append(key)
if type(value) == dict:
convertToTable(value, history)
else:
print '{} {} {}'.format(' -> '.join(history), value[0], value[1])
history.pop()
dic = {'peak': {'load': ('0', '1'), 'unload': ('2', '3')}, 'resources': {'info': {'loadxx': ('37', '17')}}}
convertToTable(dic)
# peak -> load 0 1
# peak -> unload 2 3
# resources -> info -> loadxx 37 17
I have two solutions, the first carries the names of all the keys down the levels and prints them at the bottom before returning back up the stack.
The second prints them on the way down thus avoiding having to 'remember' the levels
import sys
dic = {u'resources':
{u'info':
{u'load': (u'37', u'17')}
},
u'peak':
{u'load': (u'0', u'1')}
}
def racecar(goomba, levels=None):
if levels == None:
levels = []
for key in goomba:
if type(goomba[key]) is dict:
levels.append(key)
levels = racecar(goomba[key], levels)
else:
levels.append(key)
for name in levels:
sys.stdout.write(name + ' ')
for val in goomba[key]:
sys.stdout.write(val + ' ')
sys.stdout.write('xxx\n')
return []
def racecar2(goomba):
for key in goomba:
sys.stdout.write(key + ' ')
if type(goomba[key]) is dict:
racecar(goomba[key])
else:
for val in goomba[key]:
sys.stdout.write(val + ' ')
sys.stdout.write('xxx\n')
racecar(dic)
racecar2(dic)
returns:
peak load 0 1 xxx
resources info load 37 17 xxx
How can I test if two dictionaries are equal while taking some keys out of consideration. For example,
equal_dicts(
{'foo':1, 'bar':2, 'x':55, 'y': 77 },
{'foo':1, 'bar':2, 'x':66, 'z': 88 },
ignore_keys=('x', 'y', 'z')
)
should return True.
UPD: I'm looking for an efficient, fast solution.
UPD2. I ended up with this code, which appears to be the fastest:
def equal_dicts_1(a, b, ignore_keys):
ka = set(a).difference(ignore_keys)
kb = set(b).difference(ignore_keys)
return ka == kb and all(a[k] == b[k] for k in ka)
Timings: https://gist.github.com/2651872
def equal_dicts(d1, d2, ignore_keys):
d1_filtered = {k:v for k,v in d1.items() if k not in ignore_keys}
d2_filtered = {k:v for k,v in d2.items() if k not in ignore_keys}
return d1_filtered == d2_filtered
EDIT: This might be faster and more memory-efficient:
def equal_dicts(d1, d2, ignore_keys):
ignored = set(ignore_keys)
for k1, v1 in d1.iteritems():
if k1 not in ignored and (k1 not in d2 or d2[k1] != v1):
return False
for k2, v2 in d2.iteritems():
if k2 not in ignored and k2 not in d1:
return False
return True
Using dict comprehensions:
>>> {k: v for k,v in d1.items() if k not in ignore_keys} == \
... {k: v for k,v in d2.items() if k not in ignore_keys}
Use .viewitems() instead on Python 2.
Here's another variant:
set(ignore_keys).issuperset(k for (k, v) in d1.items() ^ d2.items())
Its virtues:
C speed identification of differences between the dicts
C speed check for membership in the set of ignored keys
Early-out if a single mismatch is found
Very very crudely, you could just delete any ignored keys and compare those dictionaries:
def equal_dicts(d1, d2, ignore_keys=()):
d1_, d2_ = d1.copy(), d2.copy()
for k in ignore_keys:
try:
del d1_[k]
except KeyError:
pass
try:
del d2_[k]
except KeyError:
pass
return d1_ == d2_
(Note that we don't need a deep copy here, we just need to avoid modifying d1 and d2.)
def compare_dict(d1, d2, ignore):
for k in d1:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
return True
Comment edit: You can do something like compare_dict(d1, d2, ignore) and compare_dict(d2, d1, ignore) or duplicate the for
def compare_dict(d1, d2, ignore):
ignore = set(ignore)
for k in d1:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
for k in d2:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
return True
Whatever is faster and cleaner!
Update: cast set(ignore)
If you need this check when testing, you can use the ANY from the unittest.mock library.
Here is an example.
from unittest.mock import ANY
actual = {'userName':'bob', 'lastModified':'2012-01-01'}
expected = {'userName':'bob', 'lastModified': ANY}
assert actual == expected
See more
Optimal solution for the case of ignoring only one key
return all(
(x == y or (x[1] == y[1] == 'key to ignore')) for x, y in itertools.izip(
d1.iteritems(), d2.iteritems()))
in case your dictionary contained lists or other dictionaries:
def equal_dicts(d1, d2, ignore_keys, equal):
# print('got d1', d1)
# print('got d2', d2)
if isinstance(d1, str):
if not isinstance(d2, str):
return False
return d1 == d2
for k in d1:
if k in ignore_keys:
continue
if not isinstance(d1[k], dict) and not isinstance(d1[k], list) and d2.get(k) != d1[k]:
print(k)
equal = False
elif isinstance(d1[k], list):
if not isinstance(d2.get(k), list):
equal = False
if len(d1[k]) != len(d2[k]):
return False
if len(d1[k]) > 0 and isinstance(d1[k][0], dict):
if not isinstance(d2[k][0], dict):
return False
d1_sorted = sorted(d1[k], key=lambda item: item.get('created'))
d2_sorted = sorted(d2[k], key=lambda item: item.get('created'))
equal = all(equal_dicts(x, y, ignore_keys, equal) for x, y in zip(d1_sorted, d2_sorted)) and equal
else:
equal = all(equal_dicts(x, y, ignore_keys, equal) for x, y in zip(d1[k], d2[k])) and equal
elif isinstance(d1[k], dict):
if not isinstance(d2.get(k), dict):
equal = False
print(k)
equal = equal_dicts(d1[k], d2[k], ignore_keys, equal) and equal
return equal