How to recursively find specific key in nested JSON?

How to recursively find specific key in nested JSON? - python

I'm trying to pull nested values from a json file. I want to print out each of the values for every "id" key. I think I'm close but can't figure out why the obj type changes from a dict to a list, and then why I'm unable to parse that list.
Here is a link to the json I'm working with: http://hastebin.com/ratevimixa.tex
and here is my current code:
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import json
json_data = open('JubJubProductions.json', 'r+')
jdata = json.loads(json_data.read().decode("utf-8"))
def recursion(dict):
for key, value in dict.items():
if type(value) == type(dict):
if key != "paging":
for key, value in value.items():
if isinstance (value,list):
print key
# place where I need to enter list comprehension?
if type(value) == type(dict):
if key == "id":
print " id found " + value
if key != "id":
print key + " 1st level"
if key == "id":
print key
else:
if key == "id":
print "id found " + value
if __name__ == '__main__':
recursion(jdata)
-------------------------------------------------------------------------------------------update
This is now what I'm working with and it'll return a single id value, but not all of them:
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import json
json_data = open('jubjubProductions', 'r+')
jdata = json.loads(json_data.read().decode("utf-8"))
def id_generator(d):
for k, v in d.items():
if k == "id":
yield v
elif isinstance(v, dict):
for id_val in id_generator(v):
yield id_val
if __name__ == '__main__':
for _ in id_generator(jdata):
print (_)

The JSON might contain a list of objects, which needs to be searched:
Python 2.7 version:
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.iteritems():
if k == lookup_key:
yield v
else:
for child_val in item_generator(v, lookup_key):
yield child_val
elif isinstance(json_input, list):
for item in json_input:
for item_val in item_generator(item, lookup_key):
yield item_val
Python 3.x version:
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.items():
if k == lookup_key:
yield v
else:
yield from item_generator(v, lookup_key)
elif isinstance(json_input, list):
for item in json_input:
yield from item_generator(item, lookup_key)

def id_generator(dict_var):
for k, v in dict_var.items():
if k == "id":
yield v
elif isinstance(v, dict):
for id_val in id_generator(v):
yield id_val
This will create an iterator which will yield every value on any level under key "id". Example usage (printing all of those values):
for _ in id_generator(some_json_dict):
print(_)

A little bit cleaner code (in python 3.x).
def parse_json_recursively(json_object, target_key):
if type(json_object) is dict and json_object:
for key in json_object:
if key == target_key:
print("{}: {}".format(target_key, json_object[key]))
parse_json_recursively(json_object[key], target_key)
elif type(json_object) is list and json_object:
for item in json_object:
parse_json_recursively(item, target_key)
json_object = {"key1": "val1", "key2": [{"key3":"val3", "key4": "val4"}, 123, "abc"]}
target_key = "key3"
parse_json_recursively(json_object, target_key) # Ouput key3: val3

Here is a simple recursive function to collect all values from a json document for a given key. Values can be json documents as well. The corresponding values appended to search_result.
def json_full_search(lookup_key, json_dict, search_result = []):
if type(json_dict) == dict:
for key, value in json_dict.items():
if key == lookup_key:
search_result.append(value)
json_full_search(lookup_key, value, search_result)
elif type(json_dict) == list:
for element in json_dict:
json_full_search(lookup_key, element, search_result)
return search_result

def get_val(j, s, v=None):
for k in j:
if v == None and k == s:
return j[k]
elif v != None and k == s and v == j[k]:
return True
elif v != None and k == s and v != j[k]:
return False
elif isinstance(j[k], dict):
return get_val(j[k], s, v)
You can use with for a json list l below,
for l in j:
r = get_val(l, 'key')
print(r)
for l in j:
r = get_val(l, 'mac', '00-60-2F-5A-04-51')
print(r)

Extension to python 3.x answer:
If nested json has similar keys under different list or dictionaries and you want to take first value of it..
below is the generic way:
def get_value_from_generator(json_input, lookup_key):
value = list(item_generator(json_input, lookup_key))
val = value[0] if value else None
print(f'lookup_key -> value : {val}')
return val
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.items():
print(f'{k} -- {v}')
if k == lookup_key:
yield v
else:
yield from item_generator(v, lookup_key)
elif isinstance(json_input, list):
for item in json_input:
yield from item_generator(item, lookup_key)

Related

Function to count the same key's in nested composed dict-list

I am trying to count the 'name' keys in this nested composed dict-list and i am getting 3 in place of 6 ,i think my problem is with the base case in the recursive function count_elem(tree)
def define_tree3():
tree3 ={'name':'GAS','grade':0.8,'children':[{'name':'CSA','grade':0.5,'children':[{'name':'MB','grade':0.1},{'name':'TA','grade':0.6}]},{'name':'IIW','grade':0.9,'children':[None,{'name':'IP','grade':0.99}]}]}
return tree3
#this fuction is to delete the given key from the given dict and retur the new dict
def delkey(dict1,key):
d=dict(dict1)
del d[key]
return d
#this function is to count the numbers of 'name'
def count_elem(tree):
if len(tree)==0:
return 0
else:
for i in tree:
if i == None:
return 0
elif i == 'name':
return 1+ count_elem(delkey(tree,i))
elif i == 'grade':
return count_elem(delkey(tree,i))
elif i == 'children':
for j in tree[i]:
if j == None:
continue
else:
return count_elem(j)
a=define_tree3()
print(count_elem(a))

This is not an answer to your question. I just tried to solve it and used a different approach. Might be useful as a reference.
tree3 = {'name':'GAS','grade':0.8,'children':[{'name':'CSA','grade':0.5,'children':[{'name':'MB','grade':0.1},{'name':'TA','grade':0.6}]},{'name':'IIW','grade':0.9,'children':[None,{'name':'IP','grade':0.99}]}]}
def count_name(entity):
count = 0
name = 'name'
# print('\n')
# print(count, entity)
if type(entity) == dict:
count += sum([key == name for key in entity.keys()])
for value in entity.values():
# print(count, value)
if type(value) == list:
count += sum([count_name(member) for member in value])
return count
count_name(tree3)

Python recursively add to number if value is int else if value is dictionary then add all items from that dictionary

I have a dictionary which has either an int or a dict as a value. if the value is a dict I would like to recursively go through the dictionary add all the possible int values and then again the recursiveness continues :)
I have the non-recursive code below. anyway this could be done differently ?
updated_dict = {}
for k, v in aapl['income_statements'][0].items():
if type(v) == int:
updated_dict.update({camel_to_snake(k): v})
elif type(v) == dict:
val = 0
for k1, v1 in v.items():
if type(v1) == int:
val0 += v1
elif type(v1) == dict:
val1 = 0
for k2, v2 in v1.items():
if type(v2) == int:
val1 += v2
elif type(v2) == dict:
# etc
final_val = val0 + val1 + val2 + val3

x = {1:{2:{3:{4:6, 5:7}, 6:8}}}
def foo(dict_d):
res = 0
for k, v in dict_d.items():
if isinstance(v, int):
res += v
elif isinstance(v, dict):
res += foo(v)
else:
continue
return res
print(foo(x))
Is this what you want?

python compare dictionary to just return True/False [duplicate]

I know there are several similar questions out there, but my question is quite different and difficult for me.
I have two dictionaries:
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
i.e. d1 has key 'a', and d2 has keys 'a' and 'newa' (in other words d1 is my old dict and d2 is my new dict).
I want to iterate over these dictionaries such that, if the key is same check for its value (nested dict), e.g. when I find key 'a' in d2, I will check whether there is 'b', if yes check value of 'cs' (changed from 10 to 30), if this value is changed I want to print it.
Another case is, I want to get key 'newa' from d2 as the newly added key.
Hence, after iterating through these 2 dicts, this is the expected output:
"d2" has new key "newa"
Value of "cs" is changed from 10 to 30 of key "b" which is of key "a"
I have the following code with me, I am trying with many loops which are not working though, but is not a good option too, hence I am looking to find whether I can get expected output with a recursive piece of code.
for k, v in d1.iteritems():
for k1, v1 in d2.iteritems():
if k is k1:
print k
for k2 in v:
for k3 in v1:
if k2 is k3:
print k2, "sub key matched"
else:
print "sorry no match found"

comparing 2 dictionaries using recursion:
Edited for python 3 (works for python 2 as well):
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
def findDiff(d1, d2, path=""):
for k in d1:
if k in d2:
if type(d1[k]) is dict:
findDiff(d1[k],d2[k], "%s -> %s" % (path, k) if path else k)
if d1[k] != d2[k]:
result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
print("\n".join(result))
else:
print ("%s%s as key not in d2\n" % ("%s: " % path if path else "", k))
print("comparing d1 to d2:")
findDiff(d1,d2)
print("comparing d2 to d1:")
findDiff(d2,d1)
Python 2 old answer:
def findDiff(d1, d2, path=""):
for k in d1:
if (k not in d2):
print (path, ":")
print (k + " as key not in d2", "\n")
else:
if type(d1[k]) is dict:
if path == "":
path = k
else:
path = path + "->" + k
findDiff(d1[k],d2[k], path)
else:
if d1[k] != d2[k]:
print (path, ":")
print (" - ", k," : ", d1[k])
print (" + ", k," : ", d2[k])
Output:
comparing d1 to d2:
a -> b:
- cs : 10
+ cs : 30
comparing d2 to d1:
a -> b:
- cs : 30
+ cs : 10

Modified user3's code to make it even better
d1= {'as': 1, 'a':
{'b':
{'cs':10,
'qqq': {'qwe':1}
},
'd': {'csd':30}
}
}
d2= {'as': 3, 'a':
{'b':
{'cs':30,
'qqq': 123
},
'd':{'csd':20}
},
'newa':
{'q':
{'cs':50}
}
}
def compare_dictionaries(dict_1, dict_2, dict_1_name, dict_2_name, path=""):
"""Compare two dictionaries recursively to find non mathcing elements
Args:
dict_1: dictionary 1
dict_2: dictionary 2
Returns:
"""
err = ''
key_err = ''
value_err = ''
old_path = path
for k in dict_1.keys():
path = old_path + "[%s]" % k
if not dict_2.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_2_name)
else:
if isinstance(dict_1[k], dict) and isinstance(dict_2[k], dict):
err += compare_dictionaries(dict_1[k],dict_2[k],'d1','d2', path)
else:
if dict_1[k] != dict_2[k]:
value_err += "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (dict_1_name, path, dict_1[k], dict_2_name, path, dict_2[k])
for k in dict_2.keys():
path = old_path + "[%s]" % k
if not dict_1.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_1_name)
return key_err + value_err + err
a = compare_dictionaries(d1,d2,'d1','d2')
print a
Output:
Key d2[newa] not in d1
Value of d1[as] (1) not same as d2[as] (3)
Value of d1[a][b][cs] (10) not same as d2[a][b][cs] (30)
Value of d1[a][b][qqq] ({'qwe': 1}) not same as d2[a][b][qqq] (123)
Value of d1[a][d][csd] (30) not same as d2[a][d][csd] (20)

why not use deepdiff library.
see it at: https://github.com/seperman/deepdiff
>>> from deepdiff import DeepDiff
>>> t1 = {1:1, 3:3, 4:4}
>>> t2 = {1:1, 3:3, 5:5, 6:6}
>>> ddiff = DeepDiff(t1, t2)
>>> print(ddiff)
{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}}
of course it is more powerful, check the doc for more.

This should provide what you need with helpful functions:
For Python 2.7
def isDict(obj):
return obj.__class__.__name__ == 'dict'
def containsKeyRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey or (isDict(vDict[curKey]) and containsKeyRec(vKey, vDict[curKey])):
return True
return False
def getValueRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey:
return vDict[curKey]
elif isDict(vDict[curKey]) and getValueRec(vKey, vDict[curKey]):
return containsKeyRec(vKey, vDict[curKey])
return None
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
for key in d1:
if containsKeyRec(key, d2):
print "dict d2 contains key: " + key
d2Value = getValueRec(key, d2)
if d1[key] == d2Value:
print "values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "values are not equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "dict d2 does not contain key: " + key
For Python 3 (or higher):
def id_dict(obj):
return obj.__class__.__name__ == 'dict'
def contains_key_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key or (id_dict(v_dict[curKey]) and contains_key_rec(v_key, v_dict[curKey])):
return True
return False
def get_value_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key:
return v_dict[curKey]
elif id_dict(v_dict[curKey]) and get_value_rec(v_key, v_dict[curKey]):
return contains_key_rec(v_key, v_dict[curKey])
return None
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
for key in d1:
if contains_key_rec(key, d2):
d2_value = get_value_rec(key, d2)
if d1[key] == d2_value:
print("values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2_value))
pass
else:
print("values are not equal:\n"
"list1: " + str(d1[key]) + "\n" +
"list2: " + str(d2_value))
else:
print("dict d2 does not contain key: " + key)

For python 3 or higher,
Code for comparing any data.
def do_compare(data1, data2, data1_name, data2_name, path=""):
if operator.eq(data1, data2) and not path:
log.info("Both data have same content")
else:
if isinstance(data1, dict) and isinstance(data2, dict):
compare_dict(data1, data2, data1_name, data2_name, path)
elif isinstance(data1, list) and isinstance(data2, list):
compare_list(data1, data2, data1_name, data2_name, path)
else:
if data1 != data2:
value_err = "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1, data2_name, path, data2)
print (value_err)
# findDiff(data1, data2)
def compare_dict(data1, data2, data1_name, data2_name, path):
old_path = path
for k in data1.keys():
path = old_path + "[%s]" % k
if k not in data2:
key_err = "Key %s%s not in %s\n" % (data1_name, path, data2_name)
print (key_err)
else:
do_compare(data1[k], data2[k], data1_name, data2_name, path)
for k in data2.keys():
path = old_path + "[%s]" % k
if k not in data1:
key_err = "Key %s%s not in %s\n" % (data2_name, path, data1_name)
print (key_err)
def compare_list(data1, data2, data1_name, data2_name, path):
data1_length = len(data1)
data2_length = len(data2)
old_path = path
if data1_length != data2_length:
value_err = "No: of items in %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1_length, data2_name, path, data2_length)
print (value_err)
for index, item in enumerate(data1):
path = old_path + "[%s]" % index
try:
do_compare(data1[index], data2[index], data1_name, data2_name, path)
except IndexError:
pass

Adding a version that adds some more capabilities:
can compare arbitrarily nested JSON-like dicts and lists
lets you specify keys to ignore (e.g. in flaky unit tests)
lets you specify keys with numerical values that will be treated as equal as long as they fall within a certain percentage of each other
If you define the deep_diff function as below and call it on #rkatkam's example you'll get:
>>> deep_diff(d1, d2)
{'newa': (None, {'q': {'cs': 50}}), 'a': {'b': {'cs': (10, 30)}}}
Here's the function definition:
def deep_diff(x, y, parent_key=None, exclude_keys=[], epsilon_keys=[]):
"""
Take the deep diff of JSON-like dictionaries
No warranties when keys, or values are None
"""
# pylint: disable=unidiomatic-typecheck
EPSILON = 0.5
rho = 1 - EPSILON
if x == y:
return None
if parent_key in epsilon_keys:
xfl, yfl = float_or_None(x), float_or_None(y)
if xfl and yfl and xfl * yfl >= 0 and rho * xfl <= yfl and rho * yfl <= xfl:
return None
if not (isinstance(x, (list, dict)) and (isinstance(x, type(y)) or isinstance(y, type(x)))):
return x, y
if isinstance(x, dict):
d = type(x)() # handles OrderedDict's as well
for k in x.keys() ^ y.keys():
if k in exclude_keys:
continue
if k in x:
d[k] = (deepcopy(x[k]), None)
else:
d[k] = (None, deepcopy(y[k]))
for k in x.keys() & y.keys():
if k in exclude_keys:
continue
next_d = deep_diff(
x[k], y[k], parent_key=k, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if next_d is None:
continue
d[k] = next_d
return d if d else None
# assume a list:
d = [None] * max(len(x), len(y))
flipped = False
if len(x) > len(y):
flipped = True
x, y = y, x
for i, x_val in enumerate(x):
d[i] = (
deep_diff(
y[i], x_val, parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if flipped
else deep_diff(
x_val, y[i], parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
)
for i in range(len(x), len(y)):
d[i] = (y[i], None) if flipped else (None, y[i])
return None if all(map(lambda x: x is None, d)) else d

Adding a non-recursive solution.
# Non Recursively traverses through a large nested dictionary
# Uses a queue of dicts_to_process to keep track of what needs to be traversed rather than using recursion.
# Slightly more complex than the recursive version, but arguably better as there is no risk of stack overflow from
# too many levels of recursion
def get_dict_diff_non_recursive(dict1, dict2):
dicts_to_process=[(dict1,dict2,"")]
while dicts_to_process:
d1,d2,current_path = dicts_to_process.pop()
for key in d1.keys():
current_path = os.path.join(current_path, f"{key}")
#print(f"searching path {current_path}")
if key not in d2 or d1[key] != d2[key]:
print(f"difference at {current_path}")
if type(d1[key]) == dict:
dicts_to_process.append((d1[key],d2[key],current_path))
elif type(d1[key]) == list and d1[key] and type(d1[key][0]) == dict:
for i in range(len(d1[key])):
dicts_to_process.append((d1[key][i], d2[key][i],current_path))

I have not liked many of the answers I have found across many threads... A lot of them recommend using deepdiff which is very powerful dont get me wrong but it just does not give me the output I was desiring which is not just a string of the diffs, or a newly built strange-looking dictionary with new keys collected from the nested keys of the original... but actually return a real dictionary with the original keys and delta values.
My use case for this is to send smaller payloads or none if there is no difference over an MQTT network.
The soluton I found is partially stolen from this link, however modified it to just give me the deltas. Then I recursively parse it, calling diff_dict() again if its nested to build the final diff dictionary. It turned out to be much simpler than many examples out there. FYI it does not care about sorting.
My Solution:
def diff_dict(d1, d2):
d1_keys = set(d1.keys())
d2_keys = set(d2.keys())
shared_keys = d1_keys.intersection(d2_keys)
shared_deltas = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
added_keys = d2_keys - d1_keys
added_deltas = {o: (None, d2[o]) for o in added_keys}
deltas = {**shared_deltas, **added_deltas}
return parse_deltas(deltas)
def parse_deltas(deltas: dict):
res = {}
for k, v in deltas.items():
if isinstance(v[0], dict):
tmp = diff_dict(v[0], v[1])
if tmp:
res[k] = tmp
else:
res[k] = v[1]
return res
Example:
original = {
'int': 1,
'float': 0.1000,
'string': 'some string',
'bool': True,
'nested1': {
'int': 2,
'float': 0.2000,
'string': 'some string2',
'bool': True,
'nested2': {
'string': 'some string3'
}
}
}
new = {
'int': 2,
'string': 'some string',
'nested1': {
'int': 2,
'float': 0.5000,
'string': 'new string',
'bool': False,
'nested2': {
'string': 'new string nested 2 time'
}
},
'test_added': 'added_val'
}
print(diff_dict(original, new))
Output:
{'int': 2, 'nested1': {'string': 'new string', 'nested2': {'string': 'new string nested 2 time'}, 'bool': False, 'float': 0.5}, 'test_added': 'added_val'}

Solution
def compare_dicts(dict1, dict2, indent=4, level=0, offset=0):
if not (isinstance(dict1, dict) or isinstance(dict2, dict)):
if dict1 == dict2:
return 'OK!'
else:
return 'MISMATCH!'
if level > 0:
print()
keys1 = set(dict1.keys())
keys2 = set(dict2.keys())
if len(keys1 | keys2) == 0:
return '' if level else None
max_len = max(tuple(map(len, keys1 | keys2))) + 2
for key in keys1 & keys2:
print(' '*indent*level + f'{key+":":<{max_len}}', end='')
print(compare_dicts(dict1[key], dict2[key], indent=indent, level=level+1))
for key in keys1 - keys2:
print(' '*indent*level + f'{key+":":<{max_len}}'
+ 'presented only in dict 1!', end='')
for key in keys2 - keys1:
print(' '*indent*level + f'{key+":":<{max_len}}'
+ 'presented only in dict 2!', end='')
return '' if level else None
Example
dict1 = {
'a': 1,
'b': {
'ba': 21,
'bb': 22,
'bc': 23,
},
'c': 3,
'd': 4,
}
dict2 = {
'a': 1,
'b': {
'ba': 21,
'bb': -22,
},
'c': 3,
'd': -4,
'e': 5,
}
compare_dicts(dict1, dict2)
Output
b:
bb: MISMATCH!
ba: OK!
bc: presented only in dict 1!
a: OK!
d: MISMATCH!
c: OK!
e: presented only in dict 2!

Traversing a dictionary recursively

I need to traverse a dictionary recursively and remember the previous keys.
Let me explain:
dic = {u'resources': {u'info': {u'load': (u'37', u'17')}}, u'peak': {u'load': (u'0', u'1')}}
The elements are always a value or a dictionary until it reaches a value.
I want to print the above dic like this: (omit the xxx below, it should eventually be a diff of the two values)
resources info load 37 17 xxx
resources peak load 0 1 xxx
This is the code I have so far:
def convertToTable(var):
if isinstance(var, tuple):
if len(var) != 2:
return str(var)
v1, v2 = var
try:
v1 = float(v1)
v2 = float(v2)
except ValueError:
pass
if type(v1) != type(v2):
return '\t%s\t%s\n' % (v1, v2)
elif isinstance(v1, int) or isinstance(v1, float):
sign = '+' if v2 - v1 > 0 else ''
return '\t%s\t%s\t%s%s\n' % (v1, v2, sign, v2 - v1)
elif isinstance(v1, list):
ret = ''
for i in range(max(len(v1), len(v2))):
v1v = v1[i] if i < len(v1) else ''
v2v = v2[i] if i < len(v2) else ''
ret += '\t%s, %s\n' % (v1v, v2v)
return ret
else:
return '\t%s\t%s\n' % (v1, v2)
elif isinstance(var, dict):
ret = ''
for key, value in var.iteritems():
# fix this crap, it's not printing all recursive levels of keys!
ret += '%s %s' % (key, convertToTable(value))
return ret
else:
return '%s\n' % (var)
I don´t know how to pass the previous keys recursively to the function again! Either I get an extra print of keys or nothing! (please do not advice me that I should use json.dumps as it does not really do what I need!)
I am hoping someone can check my solution and point out the flaw in it!

I'm not sure what's wrong with your code, but this might do what you want:
def iteritems_recursive(d):
for k,v in d.iteritems():
if isinstance(v, dict):
for k1,v1 in iteritems_recursive(v):
yield (k,)+k1, v1
else:
yield (k,),v
dic = {u'resources': {u'info': {u'load': (u'37', u'17')}, u'peak': {u'load': (u'0', u'1')}}}
for p,v in iteritems_recursive(dic):
print p, "->", v
iteritems_recursive iterates over the passed-in dictionary, and returns a a (path, value) tuple. The path is itself a tuple which describes the keys that reach that item.
The above code prints:
(u'resources', u'info', u'load') -> (u'37', u'17')
(u'resources', u'peak', u'load') -> (u'0', u'1')
If you want to print the table pretty, replace the for loop above with this:
for p,v in iteritems_recursive(dic):
diff = float(v[0]) - float(v[1])
p = ''.join('{:10}'.format(w) for w in p)
v = ''.join('{:5}'.format(f) for f in v)
print p, v, diff
Which prints:
resources info load 37 17 20.0
resources peak load 0 1 -1.0

def convertToTable(inp, history=[]):
for key, value in inp.iteritems():
history.append(key)
if type(value) == dict:
convertToTable(value, history)
else:
print '{} {} {}'.format(' -> '.join(history), value[0], value[1])
history.pop()
dic = {'peak': {'load': ('0', '1'), 'unload': ('2', '3')}, 'resources': {'info': {'loadxx': ('37', '17')}}}
convertToTable(dic)
# peak -> load 0 1
# peak -> unload 2 3
# resources -> info -> loadxx 37 17

I have two solutions, the first carries the names of all the keys down the levels and prints them at the bottom before returning back up the stack.
The second prints them on the way down thus avoiding having to 'remember' the levels
import sys
dic = {u'resources':
{u'info':
{u'load': (u'37', u'17')}
},
u'peak':
{u'load': (u'0', u'1')}
}
def racecar(goomba, levels=None):
if levels == None:
levels = []
for key in goomba:
if type(goomba[key]) is dict:
levels.append(key)
levels = racecar(goomba[key], levels)
else:
levels.append(key)
for name in levels:
sys.stdout.write(name + ' ')
for val in goomba[key]:
sys.stdout.write(val + ' ')
sys.stdout.write('xxx\n')
return []
def racecar2(goomba):
for key in goomba:
sys.stdout.write(key + ' ')
if type(goomba[key]) is dict:
racecar(goomba[key])
else:
for val in goomba[key]:
sys.stdout.write(val + ' ')
sys.stdout.write('xxx\n')
racecar(dic)
racecar2(dic)
returns:
peak load 0 1 xxx
resources info load 37 17 xxx

Compare dictionaries ignoring specific keys

How can I test if two dictionaries are equal while taking some keys out of consideration. For example,
equal_dicts(
{'foo':1, 'bar':2, 'x':55, 'y': 77 },
{'foo':1, 'bar':2, 'x':66, 'z': 88 },
ignore_keys=('x', 'y', 'z')
)
should return True.
UPD: I'm looking for an efficient, fast solution.
UPD2. I ended up with this code, which appears to be the fastest:
def equal_dicts_1(a, b, ignore_keys):
ka = set(a).difference(ignore_keys)
kb = set(b).difference(ignore_keys)
return ka == kb and all(a[k] == b[k] for k in ka)
Timings: https://gist.github.com/2651872

def equal_dicts(d1, d2, ignore_keys):
d1_filtered = {k:v for k,v in d1.items() if k not in ignore_keys}
d2_filtered = {k:v for k,v in d2.items() if k not in ignore_keys}
return d1_filtered == d2_filtered
EDIT: This might be faster and more memory-efficient:
def equal_dicts(d1, d2, ignore_keys):
ignored = set(ignore_keys)
for k1, v1 in d1.iteritems():
if k1 not in ignored and (k1 not in d2 or d2[k1] != v1):
return False
for k2, v2 in d2.iteritems():
if k2 not in ignored and k2 not in d1:
return False
return True

Using dict comprehensions:
>>> {k: v for k,v in d1.items() if k not in ignore_keys} == \
... {k: v for k,v in d2.items() if k not in ignore_keys}
Use .viewitems() instead on Python 2.

Here's another variant:
set(ignore_keys).issuperset(k for (k, v) in d1.items() ^ d2.items())
Its virtues:
C speed identification of differences between the dicts
C speed check for membership in the set of ignored keys
Early-out if a single mismatch is found

Very very crudely, you could just delete any ignored keys and compare those dictionaries:
def equal_dicts(d1, d2, ignore_keys=()):
d1_, d2_ = d1.copy(), d2.copy()
for k in ignore_keys:
try:
del d1_[k]
except KeyError:
pass
try:
del d2_[k]
except KeyError:
pass
return d1_ == d2_
(Note that we don't need a deep copy here, we just need to avoid modifying d1 and d2.)

def compare_dict(d1, d2, ignore):
for k in d1:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
return True
Comment edit: You can do something like compare_dict(d1, d2, ignore) and compare_dict(d2, d1, ignore) or duplicate the for
def compare_dict(d1, d2, ignore):
ignore = set(ignore)
for k in d1:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
for k in d2:
if k in ignore:
continue
try:
if d1[k] != d2[k]:
return False
except KeyError:
return False
return True
Whatever is faster and cleaner!
Update: cast set(ignore)

If you need this check when testing, you can use the ANY from the unittest.mock library.
Here is an example.
from unittest.mock import ANY
actual = {'userName':'bob', 'lastModified':'2012-01-01'}
expected = {'userName':'bob', 'lastModified': ANY}
assert actual == expected
See more

Optimal solution for the case of ignoring only one key
return all(
(x == y or (x[1] == y[1] == 'key to ignore')) for x, y in itertools.izip(
d1.iteritems(), d2.iteritems()))

in case your dictionary contained lists or other dictionaries:
def equal_dicts(d1, d2, ignore_keys, equal):
# print('got d1', d1)
# print('got d2', d2)
if isinstance(d1, str):
if not isinstance(d2, str):
return False
return d1 == d2
for k in d1:
if k in ignore_keys:
continue
if not isinstance(d1[k], dict) and not isinstance(d1[k], list) and d2.get(k) != d1[k]:
print(k)
equal = False
elif isinstance(d1[k], list):
if not isinstance(d2.get(k), list):
equal = False
if len(d1[k]) != len(d2[k]):
return False
if len(d1[k]) > 0 and isinstance(d1[k][0], dict):
if not isinstance(d2[k][0], dict):
return False
d1_sorted = sorted(d1[k], key=lambda item: item.get('created'))
d2_sorted = sorted(d2[k], key=lambda item: item.get('created'))
equal = all(equal_dicts(x, y, ignore_keys, equal) for x, y in zip(d1_sorted, d2_sorted)) and equal
else:
equal = all(equal_dicts(x, y, ignore_keys, equal) for x, y in zip(d1[k], d2[k])) and equal
elif isinstance(d1[k], dict):
if not isinstance(d2.get(k), dict):
equal = False
print(k)
equal = equal_dicts(d1[k], d2[k], ignore_keys, equal) and equal
return equal

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to recursively find specific key in nested JSON? - python

Related

Function to count the same key's in nested composed dict-list

Python recursively add to number if value is int else if value is dictionary then add all items from that dictionary

python compare dictionary to just return True/False [duplicate]

Traversing a dictionary recursively

Compare dictionaries ignoring specific keys

Categories

Resources