Obtaining all key path combinations in a json/dictionary in python - python

I want to be able to obtain all the various paths to the keys in a JSON file. I often obtain large JSONs and I'm not exactly sure where a various data element might be. Or I need to query various elements of the data. Visualizing a tree of the JSON can be inconvient.
Basically I want to get a list of all the different paths to make various future tasks easier.
For example:
myjson = {'transportation':'car',
'address': {'driveway':'yes','home_address':{'state':'TX',
'city':'Houston'}},
'work_address':{
'state':'TX',
'city':'Sugarland',
'location':'office-tower',
'salary':30000}}
It would be great if I could run some type of loop to get a list back in this format below or in a format....
myjson['address']['driveway']
myjson.address
myjson.address.driveway
myjson.address.home_address
myjson.address.home_address.city
myjson.address.home_address.state
myjson.transportation
myjson.work_address
myjson.work_address.city
myjson.work_address.location
myjson.work_address.salary
myjson.work_address.state
For example I've started with
mylist = []
for key, value in myjson.items():
mylist.append(key)
if type(value) is dict:
for key2, value2 in myjson[key].items():
mylist.append(key+'.'+key2)
print(mylist)
I guess this kinda works, but I don't know how to make this iterate indefinitely. For example, how would I build this up to being 3-10+ layers deep?

Great snippet !
Here is a version which manage list:
def get_keys(some_dictionary, parent=None):
if isinstance(some_dictionary, str):
return
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
if isinstance(value, list):
for v in value:
get_keys(v, parent='{}.{}'.format(parent, key))
else:
pass

I think this should do what you're asking:
myjson = {
'transportation': 'car',
'address': {
'driveway': 'yes',
'home_address': {
'state': 'TX',
'city': 'Houston'}
},
'work_address': {
'state': 'TX',
'city': 'Sugarland',
'location': 'office-tower',
'salary': 30000}
}
def get_keys(some_dictionary, parent=None):
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
else:
pass
my_list = []
get_keys(myjson, parent='myjson')
print(my_list)
Outputs:
['myjson.transportation',
'myjson.work_address',
'myjson.work_address.city',
'myjson.work_address.state',
'myjson.work_address.location',
'myjson.work_address.salary',
'myjson.address',
'myjson.address.driveway',
'myjson.address.home_address',
'myjson.address.home_address.city',
'myjson.address.home_address.state']
The key is to just keep calling get_keys() recursively from within the function!

An implementation handling paths of lists in json also.
import json
def get_json_key_path(jsonStr, enable_index):
json_keys = []
jsonObj = json.loads(jsonStr)
def get_key_path(jsonObj, parent=None):
if not isinstance(json_obj, dict):
return
for key, value in jsonObj.items():
if not isinstance(value, list) and '{}.{}'.format(parent, key) not in json_keys:
json_keys.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_key_path(value, parent='{}.{}'.format(parent, key))
elif isinstance(value, list):
i = 0
for obj in value:
if enable_index:
get_key_path(obj, parent='{}.{}.{}'.format(parent, key, i))
else:
get_key_path(obj, parent='{}.{}'.format(parent, key))
i = i + 1
else:
pass
get_key_path(jsonObj, "")
return [ s[1:] for s in json_keys]

Related

Find key recursively in dictionary and then list all parent keys

My question is similar to Finding a key recursively in a dictionary except that once I find the key, I would like to list all parent keys that lead me to the target key.
Logically, I feel like I know what I need to do: store the "path" by appending keys to a list as I descend into the dictionary. If I get to the "bottom" of the dictionary and don't find the key I'm looking for, then I need to reset the path. But, I can't think how do implement this in Python. My current solution just prints out the target key in a list:
def list_parents(obj, key):
path = []
if key in obj:
path.append(key)
return path
for k, v in obj.items():
if isinstance(v, dict):
path.extend(list_parents(v, key))
return path
Try adding the path as an optional parameter:
def list_parents(obj, key, path=[]):
if key in obj:
path.append(key)
return path
for k, v in obj.items():
if isinstance(v, dict):
found = list_parents(v, key, path=path + [k])
if found:
return found
return None
keys = ["A", "E", "G"]
for key in keys:
res = list_parents({"B": {"A": 2}, "C": 1, "D": {"E": 3, "F": {"G": 3}}, }, key)
print(res)
Output
['B', 'A']
['D', 'E']
['D', 'F', 'G']
Or as an alternative:
def list_parents(obj, key):
if key in obj:
return [key]
for k, v in obj.items():
if isinstance(v, dict):
found = list_parents(v, key)
if found:
return [k, *found]
return None
To improve the complexity of the above approach you could use a deque:
from collections import deque
def list_parents(obj, key):
if key in obj:
return deque([key])
for k, v in obj.items():
if isinstance(v, dict):
found = list_parents(v, key)
if found:
found.appendleft(k)
return found
return None
The reason to use a deque is that inserting to the front of a list (the line [k, *found]) is O(n) vs O(1) in a deque.
Alternatively, you could try this way. It still uses recursion to search through the nested dict.
dc = { 'A' : 'vaa',
'B' : 'vbb',
'C' : { 'kc': 'vcc' },
'D' : { 'kd': { 'kdd1': 'dd1',
'kdd11': 'abcc',
'key12': 'abcd'},
'kdd2': 'dd2'}
}
def find_parents(D, value):
for k, v in D.items():
if isinstance(v, dict):
parent = find_parents(v, value) # <--- search down
if parent:
return [k] + parent
elif v == value:
return [k]
print(find_parents(dc,'abcd') # ['D', 'kd', 'key12']
Another solution:
You can make recursive generator that yields all paths. In your program you will do a check if the path is "correct" (e.g. the last element in path is your key):
dct = {"a": {"b": {}, "c": {"d": "xxx"}}}
def list_parents(obj, path=None):
if path is None:
path = []
if isinstance(obj, dict):
for k, v in obj.items():
yield (p := path + [k])
yield from list_parents(v, p)
key = "d"
path = next(path for path in list_parents(dct) if path[-1] == key)
print(".".join(path))
Prints:
a.c.d

Create mechanism for updating key value pair

The following json I want to create mechanism in python for updating key value pair.
defaults={
"price_std":1.0,
"ncoen_ece":{
"iz_default":"EST",
"Iz_options"=["US/EST","Indian", "Pacific"]
}
"filters":{
"remove_hording":False,
"remove workdays":True,
"remove states": {
"CA": True,
"GA": False,
},
},
}
I am trying to load it in dictionary, loop through dictionary to check if key is present, if key is not present look through value of array.
I have tried the following code:
def update_dict(d, **kwargs):
for arg, value in kwargs.items():
if arg in ('izoptions', 'states', 'holidays'):
for k, v in d.items():
if isinstance(v, dict):
update_dict(v, **{arg: value})
elif k == arg:
d.update({arg: value})
else:
d.update({arg: value})
I am trying to load it in dictionary, loop through dictionary to check if key is present, if key is not present look through value of array.
i have tried the following code
def update_dict(d, **kwargs):
for arg, value in kwargs.items():
if arg in ('izoptions', 'states', 'holidays'):
for k, v in d.items():
if isinstance(v, dict):
update_dict(v, **{arg: value})
elif k == arg:
d.update({arg: value})
else:
d.update({arg: value})

if item in dictionary == True -> return dictionary name as a string

I need perform search in some dictionary structure :
dic_global = {
'key_lev1_1': {
'key_lev2_a': 'some_value_1',
'key_lev2_b': 'some_value_2'
},
'key_lev1_2': 'some_value_111'
}
and I need to perform recursive search for item in the whole structure so that function would return the key where the item was.
so if fun found 'key_lev2_a' it returns 'key_lev1_1' - the name of enclosure the key where the dict with 'key_lev2_a' is.
is it possible ?
def find_key(obj, key):
if key in obj:
return obj
for k, v in obj.items():
if isinstance(v, dict):
item = find_key(v, key)
if item is not None:
return k
(source: modified Get key by value in dictionary)

change the value of a multidimentional dict in python with nested dict enclosed in a list

for example, let's say i have a dict like this:
bulk_details={
"ad":'ad',
'ad':[
{'ad':'ad'},
{'ad':'ad'}
]
}
I want to encrypt the values in the dict. i'm stuck at parsing the inner dicts inside the list
my code is this:
new_data = {key: {key_: encrypt(val_) for key_, val_ in (val.items() if type(val) is dict else val)} for key, val in (bulk_details.items() if type(bulk_details) is dict else bulk_details) }
This is not nearly as compact as your one-liner, but it solves your problem and you can perhaps make it more compact:
bulk_details = {
'ad':'ad',
'ad2':
[
{'ad':'ad'},
{'ad':'ad'}
]
}
def encrypt(to_encrypt):
return '?' + to_encrypt + '?'
def encrypt_nested(dt):
if isinstance(dt, dict):
for key, value in dt.items():
if isinstance(value, str):
dt[key] = encrypt(value)
else:
encrypt_nested(value)
return dt
else: # elif isinstance(dt, list)
for value in dt:
if isinstance(value, str):
value = encrypt(value)
else:
encrypt_nested(value)
return dt
print(encrypt_nested(bulk_details))
# {'ad': '?ad?', 'ad2': [{'ad': '?ad?'}, {'ad': '?ad?'}]}
It iterates through a nested dict including arrays for any amount of levels using a recursing function.

Finding a key recursively in a dictionary

I'm trying to write a very simple function to recursively search through a possibly nested (in the most extreme cases ten levels deep) Python dictionary and return the first value it finds from the given key.
I cannot understand why my code doesn't work for nested dictionaries.
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
_finditem(v, key)
print _finditem({"B":{"A":2}},"A")
It returns None.
It does work, however, for _finditem({"B":1,"A":2},"A"), returning 2.
I'm sure it's a simple mistake but I cannot find it. I feel like there already might be something for this in the standard library or collections, but I can't find that either.
If you are looking for a general explanation of what is wrong with code like this, the canonical is Why does my recursive function return None?. The answers here are mostly specific to the task of searching in a nested dictionary.
when you recurse, you need to return the result of _finditem
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
return _finditem(v, key) #added return statement
To fix the actual algorithm, you need to realize that _finditem returns None if it didn't find anything, so you need to check that explicitly to prevent an early return:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
Of course, that will fail if you have None values in any of your dictionaries. In that case, you could set up a sentinel object() for this function and return that in the case that you don't find anything -- Then you can check against the sentinel to know if you found something or not.
Here's a function that searches a dictionary that contains both nested dictionaries and lists. It creates a list of the values of the results.
def get_recursively(search_dict, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
fields_found = []
for key, value in search_dict.iteritems():
if key == field:
fields_found.append(value)
elif isinstance(value, dict):
results = get_recursively(value, field)
for result in results:
fields_found.append(result)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_recursively(item, field)
for another_result in more_results:
fields_found.append(another_result)
return fields_found
Here is a way to do this using a "stack" and the "stack of iterators" pattern (credits to Gareth Rees):
def search(d, key, default=None):
"""Return a value corresponding to the specified key in the (possibly
nested) dictionary d. If there is no item with that key, return
default.
"""
stack = [iter(d.items())]
while stack:
for k, v in stack[-1]:
if isinstance(v, dict):
stack.append(iter(v.items()))
break
elif k == key:
return v
else:
stack.pop()
return default
The print(search({"B": {"A": 2}}, "A")) would print 2.
Just trying to make it shorter:
def get_recursively(search_dict, field):
if isinstance(search_dict, dict):
if field in search_dict:
return search_dict[field]
for key in search_dict:
item = get_recursively(search_dict[key], field)
if item is not None:
return item
elif isinstance(search_dict, list):
for element in search_dict:
item = get_recursively(element, field)
if item is not None:
return item
return None
Here's a Python 3.3+ solution which can handle lists of lists of dicts.
It also uses duck typing, so it can handle any iterable, or object implementing the 'items' method.
from typing import Iterator
def deep_key_search(obj, key: str) -> Iterator:
""" Do a deep search of {obj} and return the values of all {key} attributes found.
:param obj: Either a dict type object or an iterator.
:return: Iterator of all {key} values found"""
if isinstance(obj, str):
# When duck-typing iterators recursively, we must exclude strings
return
try:
# Assume obj is a like a dict and look for the key
for k, v in obj.items():
if k == key:
yield v
else:
yield from deep_key_search(v, key)
except AttributeError:
# Not a dict type object. Is it iterable like a list?
try:
for v in obj:
yield from deep_key_search(v, key)
except TypeError:
pass # Not iterable either.
Pytest:
#pytest.mark.parametrize(
"data, expected, dscr", [
({}, [], "Empty dict"),
({'Foo': 1, 'Bar': 2}, [1], "Plain dict"),
([{}, {'Foo': 1, 'Bar': 2}], [1], "List[dict]"),
([[[{'Baz': 3, 'Foo': 'a'}]], {'Foo': 1, 'Bar': 2}], ['a', 1], "Deep list"),
({'Foo': 1, 'Bar': {'Foo': 'c'}}, [1, 'c'], "Dict of Dict"),
(
{'Foo': 1, 'Bar': {'Foo': 'c', 'Bar': 'abcdef'}},
[1, 'c'], "Contains a non-selected string value"
),
])
def test_deep_key_search(data, expected, dscr):
assert list(deep_key_search(data, 'Foo')) == expected
I couldn't add a comment to the accepted solution proposed by #mgilston because of lack of reputation. The solution doesn't work if the key being searched for is inside a list.
Looping through the elements of the lists and calling the recursive function should extend the functionality to find elements inside nested lists:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
elif isinstance(v,list):
for list_item in v:
item = _finditem(list_item, key)
if item is not None:
return item
print(_finditem({"C": {"B": [{"A":2}]}}, "A"))
I had to create a general-case version that finds a uniquely-specified key (a minimal dictionary that specifies the path to the desired value) in a dictionary that contains multiple nested dictionaries and lists.
For the example below, a target dictionary is created to search, and the key is created with the wildcard "???". When run, it returns the value "D"
def lfind(query_list:List, target_list:List, targ_str:str = "???"):
for tval in target_list:
#print("lfind: tval = {}, query_list[0] = {}".format(tval, query_list[0]))
if isinstance(tval, dict):
val = dfind(query_list[0], tval, targ_str)
if val:
return val
elif tval == query_list[0]:
return tval
def dfind(query_dict:Dict, target_dict:Dict, targ_str:str = "???"):
for key, qval in query_dict.items():
tval = target_dict[key]
#print("dfind: key = {}, qval = {}, tval = {}".format(key, qval, tval))
if isinstance(qval, dict):
val = dfind(qval, tval, targ_str)
if val:
return val
elif isinstance(qval, list):
return lfind(qval, tval, targ_str)
else:
if qval == targ_str:
return tval
if qval != tval:
break
def find(target_dict:Dict, query_dict:Dict):
result = dfind(query_dict, target_dict)
return result
target_dict = {"A":[
{"key1":"A", "key2":{"key3": "B"}},
{"key1":"C", "key2":{"key3": "D"}}]
}
query_dict = {"A":[{"key1":"C", "key2":{"key3": "???"}}]}
result = find(target_dict, query_dict)
print("result = {}".format(result))
Thought I'd throw my hat in the ring, this will allow for recursive requests on anything that implements a __getitem__ method.
def _get_recursive(obj, args, default=None):
"""Apply successive requests to an obj that implements __getitem__ and
return result if something is found, else return default"""
if not args:
return obj
try:
key, *args = args
_obj = object.__getitem__(obj, key)
return _get_recursive(_obj, args, default=default)
except (KeyError, IndexError, AttributeError):
return default

Categories

Resources