Related
I want to be able to obtain all the various paths to the keys in a JSON file. I often obtain large JSONs and I'm not exactly sure where a various data element might be. Or I need to query various elements of the data. Visualizing a tree of the JSON can be inconvient.
Basically I want to get a list of all the different paths to make various future tasks easier.
For example:
myjson = {'transportation':'car',
'address': {'driveway':'yes','home_address':{'state':'TX',
'city':'Houston'}},
'work_address':{
'state':'TX',
'city':'Sugarland',
'location':'office-tower',
'salary':30000}}
It would be great if I could run some type of loop to get a list back in this format below or in a format....
myjson['address']['driveway']
myjson.address
myjson.address.driveway
myjson.address.home_address
myjson.address.home_address.city
myjson.address.home_address.state
myjson.transportation
myjson.work_address
myjson.work_address.city
myjson.work_address.location
myjson.work_address.salary
myjson.work_address.state
For example I've started with
mylist = []
for key, value in myjson.items():
mylist.append(key)
if type(value) is dict:
for key2, value2 in myjson[key].items():
mylist.append(key+'.'+key2)
print(mylist)
I guess this kinda works, but I don't know how to make this iterate indefinitely. For example, how would I build this up to being 3-10+ layers deep?
Great snippet !
Here is a version which manage list:
def get_keys(some_dictionary, parent=None):
if isinstance(some_dictionary, str):
return
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
if isinstance(value, list):
for v in value:
get_keys(v, parent='{}.{}'.format(parent, key))
else:
pass
I think this should do what you're asking:
myjson = {
'transportation': 'car',
'address': {
'driveway': 'yes',
'home_address': {
'state': 'TX',
'city': 'Houston'}
},
'work_address': {
'state': 'TX',
'city': 'Sugarland',
'location': 'office-tower',
'salary': 30000}
}
def get_keys(some_dictionary, parent=None):
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
else:
pass
my_list = []
get_keys(myjson, parent='myjson')
print(my_list)
Outputs:
['myjson.transportation',
'myjson.work_address',
'myjson.work_address.city',
'myjson.work_address.state',
'myjson.work_address.location',
'myjson.work_address.salary',
'myjson.address',
'myjson.address.driveway',
'myjson.address.home_address',
'myjson.address.home_address.city',
'myjson.address.home_address.state']
The key is to just keep calling get_keys() recursively from within the function!
An implementation handling paths of lists in json also.
import json
def get_json_key_path(jsonStr, enable_index):
json_keys = []
jsonObj = json.loads(jsonStr)
def get_key_path(jsonObj, parent=None):
if not isinstance(json_obj, dict):
return
for key, value in jsonObj.items():
if not isinstance(value, list) and '{}.{}'.format(parent, key) not in json_keys:
json_keys.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_key_path(value, parent='{}.{}'.format(parent, key))
elif isinstance(value, list):
i = 0
for obj in value:
if enable_index:
get_key_path(obj, parent='{}.{}.{}'.format(parent, key, i))
else:
get_key_path(obj, parent='{}.{}'.format(parent, key))
i = i + 1
else:
pass
get_key_path(jsonObj, "")
return [ s[1:] for s in json_keys]
I'm a beginner programmer and I've been trying to parse a json output file from an API GET request in order to pull longitude and latitude coordinates.
The JSON file looks like this.
The JSON input file is here.
My code for parsing the json file currently looks like this:
ourResult = js['transactions'][0]['meta']
for majorkey, subdict in ourResult.iteritems():
print majorkey
for subkey, value in subdict.iteritems():
print subkey, value
This however, is only returning the one set of values within the 'location' key, and I'm trying to go a level further to pull the 'lon' and 'lat' values.
Any idea what code I should be using for this?
As I understand your question you needed something like:
js = json.loads(response.content)
ourResult = js['transactions'][0]['meta']
for majorkey, subdict in ourResult.iteritems():
print majorkey
if type(subdict) == dict:
for subkey, value in subdict.iteritems():
print subkey, value
You can print dict of arbitrary depth using code
def print_dict_rec( indict ):
for majorkey, subdict in indict.iteritems():
if type(subdict) == dict:
print majorkey
print_dict_rec(subdict)
else:
print majorkey, subdict
print_dict_rec(ourResult)
Code for extracting all values for keys 'lat' and 'lon':
def get_values_json( js, res ):
if type(js) == list:
for e in js:
get_values_json(e, res)
elif type(js) == dict:
for k,v in js.iteritems():
if type(v) == dict or type(v) == list:
get_values_json(v, res)
else:
if k == 'lat' or k == 'lon':
res[k].append(v)
res = {'lat':[], 'lon':[]}
get_values_json(js, res)
print res
I am using pymongo to insert a complex structure as a row in a collection. The structure is a dict of list of dicts of lists of dicts etc..
Is there a way to find which field is unicode instead of str, that causes the error? I have tried:
def dump(obj):
with open('log', 'w') as flog:
for attr in dir(obj):
t, att = type(attr), getattr(obj, attr)
output = "obj.%s = %s" % (t, att)
flog.write(output)
but no luck so far.
Any clever recursive way to print everything maybe?
Thanks
The following helped me to find out which dict contained unicode values, since a dict can be identified by its keys. The list-case doesn't help.
def find_the_damn_unicode(obj):
if isinstance(obj, unicode):
''' The following conversion probably doesn't do anything meaningfull since
obj is probably a primitive type, thus passed by value. Thats why encoding
is also performed inside the for loops below'''
obj = obj.encode('utf-8')
return obj
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, unicode):
print 'UNICODE value with key ', k
obj[k] = obj[k].encode('utf-8')
else:
obj[k] = find_the_damn_unicode(v)
if isinstance(obj, list):
for i, v in enumerate(obj):
if isinstance(v, unicode):
print 'UNICODE inside a ... list'
obj[i] = obj[i].encode('utf-8')
else:
obj[i] = find_the_damn_unicode(v)
return obj
I'm trying to write a very simple function to recursively search through a possibly nested (in the most extreme cases ten levels deep) Python dictionary and return the first value it finds from the given key.
I cannot understand why my code doesn't work for nested dictionaries.
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
_finditem(v, key)
print _finditem({"B":{"A":2}},"A")
It returns None.
It does work, however, for _finditem({"B":1,"A":2},"A"), returning 2.
I'm sure it's a simple mistake but I cannot find it. I feel like there already might be something for this in the standard library or collections, but I can't find that either.
If you are looking for a general explanation of what is wrong with code like this, the canonical is Why does my recursive function return None?. The answers here are mostly specific to the task of searching in a nested dictionary.
when you recurse, you need to return the result of _finditem
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
return _finditem(v, key) #added return statement
To fix the actual algorithm, you need to realize that _finditem returns None if it didn't find anything, so you need to check that explicitly to prevent an early return:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
Of course, that will fail if you have None values in any of your dictionaries. In that case, you could set up a sentinel object() for this function and return that in the case that you don't find anything -- Then you can check against the sentinel to know if you found something or not.
Here's a function that searches a dictionary that contains both nested dictionaries and lists. It creates a list of the values of the results.
def get_recursively(search_dict, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
fields_found = []
for key, value in search_dict.iteritems():
if key == field:
fields_found.append(value)
elif isinstance(value, dict):
results = get_recursively(value, field)
for result in results:
fields_found.append(result)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
more_results = get_recursively(item, field)
for another_result in more_results:
fields_found.append(another_result)
return fields_found
Here is a way to do this using a "stack" and the "stack of iterators" pattern (credits to Gareth Rees):
def search(d, key, default=None):
"""Return a value corresponding to the specified key in the (possibly
nested) dictionary d. If there is no item with that key, return
default.
"""
stack = [iter(d.items())]
while stack:
for k, v in stack[-1]:
if isinstance(v, dict):
stack.append(iter(v.items()))
break
elif k == key:
return v
else:
stack.pop()
return default
The print(search({"B": {"A": 2}}, "A")) would print 2.
Just trying to make it shorter:
def get_recursively(search_dict, field):
if isinstance(search_dict, dict):
if field in search_dict:
return search_dict[field]
for key in search_dict:
item = get_recursively(search_dict[key], field)
if item is not None:
return item
elif isinstance(search_dict, list):
for element in search_dict:
item = get_recursively(element, field)
if item is not None:
return item
return None
Here's a Python 3.3+ solution which can handle lists of lists of dicts.
It also uses duck typing, so it can handle any iterable, or object implementing the 'items' method.
from typing import Iterator
def deep_key_search(obj, key: str) -> Iterator:
""" Do a deep search of {obj} and return the values of all {key} attributes found.
:param obj: Either a dict type object or an iterator.
:return: Iterator of all {key} values found"""
if isinstance(obj, str):
# When duck-typing iterators recursively, we must exclude strings
return
try:
# Assume obj is a like a dict and look for the key
for k, v in obj.items():
if k == key:
yield v
else:
yield from deep_key_search(v, key)
except AttributeError:
# Not a dict type object. Is it iterable like a list?
try:
for v in obj:
yield from deep_key_search(v, key)
except TypeError:
pass # Not iterable either.
Pytest:
#pytest.mark.parametrize(
"data, expected, dscr", [
({}, [], "Empty dict"),
({'Foo': 1, 'Bar': 2}, [1], "Plain dict"),
([{}, {'Foo': 1, 'Bar': 2}], [1], "List[dict]"),
([[[{'Baz': 3, 'Foo': 'a'}]], {'Foo': 1, 'Bar': 2}], ['a', 1], "Deep list"),
({'Foo': 1, 'Bar': {'Foo': 'c'}}, [1, 'c'], "Dict of Dict"),
(
{'Foo': 1, 'Bar': {'Foo': 'c', 'Bar': 'abcdef'}},
[1, 'c'], "Contains a non-selected string value"
),
])
def test_deep_key_search(data, expected, dscr):
assert list(deep_key_search(data, 'Foo')) == expected
I couldn't add a comment to the accepted solution proposed by #mgilston because of lack of reputation. The solution doesn't work if the key being searched for is inside a list.
Looping through the elements of the lists and calling the recursive function should extend the functionality to find elements inside nested lists:
def _finditem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v,dict):
item = _finditem(v, key)
if item is not None:
return item
elif isinstance(v,list):
for list_item in v:
item = _finditem(list_item, key)
if item is not None:
return item
print(_finditem({"C": {"B": [{"A":2}]}}, "A"))
I had to create a general-case version that finds a uniquely-specified key (a minimal dictionary that specifies the path to the desired value) in a dictionary that contains multiple nested dictionaries and lists.
For the example below, a target dictionary is created to search, and the key is created with the wildcard "???". When run, it returns the value "D"
def lfind(query_list:List, target_list:List, targ_str:str = "???"):
for tval in target_list:
#print("lfind: tval = {}, query_list[0] = {}".format(tval, query_list[0]))
if isinstance(tval, dict):
val = dfind(query_list[0], tval, targ_str)
if val:
return val
elif tval == query_list[0]:
return tval
def dfind(query_dict:Dict, target_dict:Dict, targ_str:str = "???"):
for key, qval in query_dict.items():
tval = target_dict[key]
#print("dfind: key = {}, qval = {}, tval = {}".format(key, qval, tval))
if isinstance(qval, dict):
val = dfind(qval, tval, targ_str)
if val:
return val
elif isinstance(qval, list):
return lfind(qval, tval, targ_str)
else:
if qval == targ_str:
return tval
if qval != tval:
break
def find(target_dict:Dict, query_dict:Dict):
result = dfind(query_dict, target_dict)
return result
target_dict = {"A":[
{"key1":"A", "key2":{"key3": "B"}},
{"key1":"C", "key2":{"key3": "D"}}]
}
query_dict = {"A":[{"key1":"C", "key2":{"key3": "???"}}]}
result = find(target_dict, query_dict)
print("result = {}".format(result))
Thought I'd throw my hat in the ring, this will allow for recursive requests on anything that implements a __getitem__ method.
def _get_recursive(obj, args, default=None):
"""Apply successive requests to an obj that implements __getitem__ and
return result if something is found, else return default"""
if not args:
return obj
try:
key, *args = args
_obj = object.__getitem__(obj, key)
return _get_recursive(_obj, args, default=default)
except (KeyError, IndexError, AttributeError):
return default
Ok I'm working on getting better with python, so I'm not sure this is the right way to go about what I'm doing to begin with, but here's my current problem...
I need to get some information via a SOAP method, and only use part of the information now but store the entire result for future uses (we need to use the service as little as possible). Looking up the best way to access the service I figured suds was the way to go, and it was simple and worked like a charm to get the data. But now I want to save the result somehow, preferably serialized / in a database so I can pull it out later and use it the same.
What's the best way to do this, it looks like pickle/json isn't an option? Thanks!
Update
Reading the top answer at How can I pickle suds results? gives me a better idea of why this isn't an option, I guess I'm stuck recreating a basic object w/ the information I need?
I have been using following approach to convert Suds object into JSON:
from suds.sudsobject import asdict
def recursive_asdict(d):
"""Convert Suds object into serializable format."""
out = {}
for k, v in asdict(d).items():
if hasattr(v, '__keylist__'):
out[k] = recursive_asdict(v)
elif isinstance(v, list):
out[k] = []
for item in v:
if hasattr(item, '__keylist__'):
out[k].append(recursive_asdict(item))
else:
out[k].append(item)
else:
out[k] = v
return out
def suds_to_json(data):
return json.dumps(recursive_asdict(data))
Yep, I confirm the explanation I gave in the answer you refer to -- dynamically generated classes are not easily picklable (nor otherwise easily serializable), you need to extract all the state information, pickle that state, and reconstruct the tricky sudsobject on retrieval if you really insist on using it;-).
Here is what I came up with before researching and finding this answer. This actually works well for me on complex suds responses and also on other objects such as __builtins__ since the solution is suds agnostic:
import datetime
def object_to_dict(obj):
if isinstance(obj, (str, unicode, bool, int, long, float, datetime.datetime, datetime.date, datetime.time)):
return obj
data_dict = {}
try:
all_keys = obj.__dict__.keys() # vars(obj).keys()
except AttributeError:
return obj
fields = [k for k in all_keys if not k.startswith('_')]
for field in fields:
val = getattr(obj, field)
if isinstance(val, (list, tuple)):
data_dict[field] = []
for item in val:
data_dict[field].append(object_to_dict(item))
else:
data_dict[field] = object_to_dict(val)
return data_dict
This solution works and is actually faster. It also works on objects that don't have the __keylist__ attribute.
I ran a benchmark 100 times on a complex suds output object, this solutions run time was 0.04 to .052 seconds (0.045724287 average). While recursive_asdict solution above ran in .082 to 0.102 seconds so nearly double (0.0829765582 average).
I then went back to the drawing board and re-did the function to get more performance out of it, and it does not need the datetime import. I leveraged in using the __keylist__ attribute, so this will not work on other objects such as __builtins__ but works nicely for suds object output:
def fastest_object_to_dict(obj):
if not hasattr(obj, '__keylist__'):
return obj
data = {}
fields = obj.__keylist__
for field in fields:
val = getattr(obj, field)
if isinstance(val, list): # tuple not used
data[field] = []
for item in val:
data[field].append(fastest_object_to_dict(item))
else:
data[field] = fastest_object_to_dict(val)
return data
The run time was 0.18 - 0.033 seconds (0.0260889721 average), so nearly 4x as faster than the recursive_asdict solution.
I made an implementation of a dummy class for Object intance of suds, and then being able to serialize. The FakeSudsInstance behaves like an original Suds Object instance, see below:
from suds.sudsobject import Object as SudsObject
class FakeSudsNode(SudsObject):
def __init__(self, data):
SudsObject.__init__(self)
self.__keylist__ = data.keys()
for key, value in data.items():
if isinstance(value, dict):
setattr(self, key, FakeSudsNode(value))
elif isinstance(value, list):
l = []
for v in value:
if isinstance(v, list) or isinstance(v, dict):
l.append(FakeSudsNode(v))
else:
l.append(v)
setattr(self, key, l)
else:
setattr(self, key, value)
class FakeSudsInstance(SudsObject):
def __init__(self, data):
SudsObject.__init__(self)
self.__keylist__ = data.keys()
for key, value in data.items():
if isinstance(value, dict):
setattr(self, key, FakeSudsNode(value))
else:
setattr(self, key, value)
#classmethod
def build_instance(cls, instance):
suds_data = {}
def node_to_dict(node, node_data):
if hasattr(node, '__keylist__'):
keys = node.__keylist__
for key in keys:
if isinstance(node[key], list):
lkey = key.replace('[]', '')
node_data[lkey] = node_to_dict(node[key], [])
elif hasattr(node[key], '__keylist__'):
node_data[key] = node_to_dict(node[key], {})
else:
if isinstance(node_data, list):
node_data.append(node[key])
else:
node_data[key] = node[key]
return node_data
else:
if isinstance(node, list):
for lnode in node:
node_data.append(node_to_dict(lnode, {}))
return node_data
else:
return node
node_to_dict(instance, suds_data)
return cls(suds_data)
Now, after a suds call, for example below:
# Now, after a suds call, for example below
>>> import cPickle as pickle
>>> suds_intance = client.service.SomeCall(account, param)
>>> fake_suds = FakeSudsInstance.build_instance(suds_intance)
>>> dumped = pickle.dumps(fake_suds)
>>> loaded = pickle.loads(dumped)
I hope it helps.
The solutions suggesed above lose valuable information about class names - it can be of value in some libraries like DFP client https://github.com/googleads/googleads-python-lib where entity types might be encoded in dynamically generated class names (i.e. TemplateCreative/ImageCreative)
Here's the solution I used that preserves class names and restores dict-serialized objects without data loss (except suds.sax.text.Text which would be converted into regular unicode objects and maybe some other types I haven't run into)
from suds.sudsobject import asdict, Factory as SudsFactory
def suds2dict(d):
"""
Suds object serializer
Borrowed from https://stackoverflow.com/questions/2412486/serializing-a-suds-object-in-python/15678861#15678861
"""
out = {'__class__': d.__class__.__name__}
for k, v in asdict(d).iteritems():
if hasattr(v, '__keylist__'):
out[k] = suds2dict(v)
elif isinstance(v, list):
out[k] = []
for item in v:
if hasattr(item, '__keylist__'):
out[k].append(suds2dict(item))
else:
out[k].append(item)
else:
out[k] = v
return out
def dict2suds(d):
"""
Suds object deserializer
"""
out = {}
for k, v in d.iteritems():
if isinstance(v, dict):
out[k] = dict2suds(v)
elif isinstance(v, list):
out[k] = []
for item in v:
if isinstance(item, dict):
out[k].append(dict2suds(item))
else:
out[k].append(item)
else:
out[k] = v
return SudsFactory.object(out.pop('__class__'), out)
I updated the recursive_asdict example above to be compatible with python3 (items instead of iteritems).
from suds.sudsobject import asdict
from suds.sax.text import Text
def recursive_asdict(d):
"""
Recursively convert Suds object into dict.
We convert the keys to lowercase, and convert sax.Text
instances to Unicode.
Taken from:
https://stackoverflow.com/a/15678861/202168
Let's create a suds object from scratch with some lists and stuff
>>> from suds.sudsobject import Object as SudsObject
>>> sudsobject = SudsObject()
>>> sudsobject.Title = "My title"
>>> sudsobject.JustAList = [1, 2, 3]
>>> sudsobject.Child = SudsObject()
>>> sudsobject.Child.Title = "Child title"
>>> sudsobject.Child.AnotherList = ["4", "5", "6"]
>>> childobject = SudsObject()
>>> childobject.Title = "Another child title"
>>> sudsobject.Child.SudObjectList = [childobject]
Now see if this works:
>>> result = recursive_asdict(sudsobject)
>>> result['title']
'My title'
>>> result['child']['anotherlist']
['4', '5', '6']
"""
out = {}
for k, v in asdict(d).items():
k = k.lower()
if hasattr(v, '__keylist__'):
out[k] = recursive_asdict(v)
elif isinstance(v, list):
out[k] = []
for item in v:
if hasattr(item, '__keylist__'):
out[k].append(recursive_asdict(item))
else:
out[k].append(
item.title() if isinstance(item, Text) else item)
else:
out[k] = v.title() if isinstance(v, Text) else v
return out
I like this way. We don't do the iteration ourselves, it is python that iterates when converting it to string
class Ob:
def __init__(self, J) -> None:
self.J = J
def __str__(self):
if hasattr(self.J, "__keylist__"):
self.J = {key: Ob(value) for key, value in dict(self.J).items()}
if hasattr(self.J, "append"):
self.J = [Ob(data) for data in sefl.J]
return str(self.J)
result = Ob(result_soap)