I'm a beginner programmer and I've been trying to parse a json output file from an API GET request in order to pull longitude and latitude coordinates.
The JSON file looks like this.
The JSON input file is here.
My code for parsing the json file currently looks like this:
ourResult = js['transactions'][0]['meta']
for majorkey, subdict in ourResult.iteritems():
print majorkey
for subkey, value in subdict.iteritems():
print subkey, value
This however, is only returning the one set of values within the 'location' key, and I'm trying to go a level further to pull the 'lon' and 'lat' values.
Any idea what code I should be using for this?
As I understand your question you needed something like:
js = json.loads(response.content)
ourResult = js['transactions'][0]['meta']
for majorkey, subdict in ourResult.iteritems():
print majorkey
if type(subdict) == dict:
for subkey, value in subdict.iteritems():
print subkey, value
You can print dict of arbitrary depth using code
def print_dict_rec( indict ):
for majorkey, subdict in indict.iteritems():
if type(subdict) == dict:
print majorkey
print_dict_rec(subdict)
else:
print majorkey, subdict
print_dict_rec(ourResult)
Code for extracting all values for keys 'lat' and 'lon':
def get_values_json( js, res ):
if type(js) == list:
for e in js:
get_values_json(e, res)
elif type(js) == dict:
for k,v in js.iteritems():
if type(v) == dict or type(v) == list:
get_values_json(v, res)
else:
if k == 'lat' or k == 'lon':
res[k].append(v)
res = {'lat':[], 'lon':[]}
get_values_json(js, res)
print res
Related
There is a JSON like this:
{
"P1": "ss",
"Id": 1234,
"P2": {
"P1": "cccc"
},
"P3": [
{
"P1": "aaa"
}
]
}
How can I find all P1's value without it iterating all JSON?
P.S.: P1 can be anywhere in the JSON.
If no method can do this, can you tell me how to iterate through the JSON?
As I said in my other answer, I don't think there is a way of finding all values associated with the "P1" key without iterating over the whole structure. However I've come up with even better way to do that which came to me while looking at #Mike Brennan's answer to another JSON-related question How to get string objects instead of Unicode from JSON?
The basic idea is to use the object_hook parameter that json.loads() accepts just to watch what is being decoded and check for the sought-after value.
Note: This will only work if the representation is of a JSON object (i.e. something enclosed in curly braces {}), as in your sample.
from __future__ import print_function
import json
def find_values(id, json_repr):
results = []
def _decode_dict(a_dict):
try:
results.append(a_dict[id])
except KeyError:
pass
return a_dict
json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.
return results
json_repr = '{"P1": "ss", "Id": 1234, "P2": {"P1": "cccc"}, "P3": [{"P1": "aaa"}]}'
print(find_values('P1', json_repr))
(Python 3) output:
['cccc', 'aaa', 'ss']
I had the same issue just the other day. I wound up just searching through the entire object and accounted for both lists and dicts. The following snippets allows you to search for the first occurrence of a multiple keys.
import json
def deep_search(needles, haystack):
found = {}
if type(needles) != type([]):
needles = [needles]
if type(haystack) == type(dict()):
for needle in needles:
if needle in haystack.keys():
found[needle] = haystack[needle]
elif len(haystack.keys()) > 0:
for key in haystack.keys():
result = deep_search(needle, haystack[key])
if result:
for k, v in result.items():
found[k] = v
elif type(haystack) == type([]):
for node in haystack:
result = deep_search(needles, node)
if result:
for k, v in result.items():
found[k] = v
return found
deep_search(["P1", "P3"], json.loads(json_string))
It returns a dict with the keys being the keys searched for. Haystack is expected to be a Python object already, so you have to do json.loads before passing it to deep_search.
Any comments for optimization are welcomed!
My approach to this problem would be different.
As JSON doesn't allow depth first search, so convert the json to a Python Object, feed it to an XML decoder and then extract the Node you are intending to search
from xml.dom.minidom import parseString
import json
def bar(somejson, key):
def val(node):
# Searches for the next Element Node containing Value
e = node.nextSibling
while e and e.nodeType != e.ELEMENT_NODE:
e = e.nextSibling
return (e.getElementsByTagName('string')[0].firstChild.nodeValue if e
else None)
# parse the JSON as XML
foo_dom = parseString(xmlrpclib.dumps((json.loads(somejson),)))
# and then search all the name tags which are P1's
# and use the val user function to get the value
return [val(node) for node in foo_dom.getElementsByTagName('name')
if node.firstChild.nodeValue in key]
bar(foo, 'P1')
[u'cccc', u'aaa', u'ss']
bar(foo, ('P1','P2'))
[u'cccc', u'cccc', u'aaa', u'ss']
Using json to convert the json to Python objects and then going through recursively works best. This example does include going through lists.
import json
def get_all(myjson, key):
if type(myjson) == str:
myjson = json.loads(myjson)
if type(myjson) is dict:
for jsonkey in myjson:
if type(myjson[jsonkey]) in (list, dict):
get_all(myjson[jsonkey], key)
elif jsonkey == key:
print myjson[jsonkey]
elif type(myjson) is list:
for item in myjson:
if type(item) in (list, dict):
get_all(item, key)
Converting the JSON to Python and recursively searching is by far the easiest:
def findall(v, k):
if type(v) == type({}):
for k1 in v:
if k1 == k:
print v[k1]
findall(v[k1], k)
findall(json.loads(a), 'P1')
(where a is the string)
The example code ignores arrays. Adding that is left as an exercise.
Bearing in mind that json is simply a string, using regular expressions with look-ahead and look-behind can accomplish this task very quickly.
Typically, the json would have been extracted from a request to external api, so code to show how that would work has been included but commented out.
import re
#import requests
#import json
#r1 = requests.get( ... url to some api ...)
#JSON = str(json.loads(r1.text))
JSON = """
{
"P1": "ss",
"Id": 1234,
"P2": {
"P1": "cccc"
},
"P3": [
{
"P1": "aaa"
}
]
}
"""
rex1 = re.compile('(?<=\"P1\": \")[a-zA-Z_\- ]+(?=\")')
rex2 = rex1.findall(JSON)
print(rex2)
#['ss', 'cccc', 'aaa']
I don't think there's any way of finding all values associated with P1 without iterating over the whole structure. Here's a recursive way to do it that first deserializes the JSON object into an equivalent Python object. To simplify things most of the work is done via a recursive private nested function.
import json
try:
STRING_TYPE = basestring
except NameError:
STRING_TYPE = str # Python 3
def find_values(id, obj):
results = []
def _find_values(id, obj):
try:
for key, value in obj.items(): # dict?
if key == id:
results.append(value)
elif not isinstance(value, STRING_TYPE):
_find_values(id, value)
except AttributeError:
pass
try:
for item in obj: # iterable?
if not isinstance(item, STRING_TYPE):
_find_values(id, item)
except TypeError:
pass
if not isinstance(obj, STRING_TYPE):
_find_values(id, obj)
return results
json_repr = '{"P1": "ss", "Id": 1234, "P2": {"P1": "cccc"}, "P3": [{"P1": "aaa"}]}'
obj = json.loads(json_repr)
print(find_values('P1', obj))
You could also use a generator to search the object after json.load().
Code example from my answer here: https://stackoverflow.com/a/39016088/5250939
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.iteritems():
if k == lookup_key:
yield v
else:
for child_val in item_generator(v, lookup_key):
yield child_val
elif isinstance(json_input, list):
for item in json_input:
for item_val in item_generator(item, lookup_key):
yield item_val
The question is old, but no answer answered 100%, so this was my solution:
what it does:
recursive algorithm;
list search;
object search;
returns all the results it finds in the tree;
returns the id of the parent in the key
suggestions:
study Depth First Search and Breadth First Search;
if your json is too big, recursion may be a problem, research stack algorithm
#staticmethod
def search_into_json_myversion(jsondata, searchkey, parentkeyname: str = None) -> list:
found = []
if type(jsondata) is list:
for element in jsondata:
val = Tools.search_into_json_myversion(element, searchkey, parentkeyname=parentkeyname)
if len(val) != 0:
found = found + val
elif type(jsondata) is dict:
if searchkey in jsondata.keys():
pathkey = parentkeyname + '->' + searchkey if parentkeyname != None else searchkey
found.append({pathkey: jsondata[searchkey]})
else:
for key, value in jsondata.items():
val = Tools.search_into_json_myversion(value, searchkey, parentkeyname=key)
if len(val) != 0:
found = found + val
return found
If i know there will be three levels of json file, and I wanna match the third level of text. i can have code shown like below. But if i don't know exact number of levels, how can I write a generic function for this in python? Any help would be appreciated.
Thanks
values=['text1','text2','text3']
event_json={'text1':[{
'othertext': {},
'text2': [{
'text3':{.....},
'othertext1': {},
....}],
...}]}
def function():
if event_json:
for event in event_json['text1']:
for activity in event['text2']:
if 'text3' in activity and
activity['text3'] == expected_name:
print('Match the text')
You may iterate over the values of the path you have then depending the current value if a dict or list you check into
values = ['text1', 'text2', 'text3']
event_json = {'text1': [{'text2': [{'text3': "LOOKFOR"}], }]}
def function(expected):
value = event_json
for key in values:
if isinstance(value, dict):
if key not in value:
print(value, key)
return -1
value = value[key]
elif isinstance(value, list):
for obj in value:
if key in obj:
value = obj[key]
break
else:
return -2
return value == expected # if it's array change to : expected in value
if __name__ == '__main__':
print(function("LOOKFOR")) # true
print(function("LOOK-FOR")) # false
I want to be able to obtain all the various paths to the keys in a JSON file. I often obtain large JSONs and I'm not exactly sure where a various data element might be. Or I need to query various elements of the data. Visualizing a tree of the JSON can be inconvient.
Basically I want to get a list of all the different paths to make various future tasks easier.
For example:
myjson = {'transportation':'car',
'address': {'driveway':'yes','home_address':{'state':'TX',
'city':'Houston'}},
'work_address':{
'state':'TX',
'city':'Sugarland',
'location':'office-tower',
'salary':30000}}
It would be great if I could run some type of loop to get a list back in this format below or in a format....
myjson['address']['driveway']
myjson.address
myjson.address.driveway
myjson.address.home_address
myjson.address.home_address.city
myjson.address.home_address.state
myjson.transportation
myjson.work_address
myjson.work_address.city
myjson.work_address.location
myjson.work_address.salary
myjson.work_address.state
For example I've started with
mylist = []
for key, value in myjson.items():
mylist.append(key)
if type(value) is dict:
for key2, value2 in myjson[key].items():
mylist.append(key+'.'+key2)
print(mylist)
I guess this kinda works, but I don't know how to make this iterate indefinitely. For example, how would I build this up to being 3-10+ layers deep?
Great snippet !
Here is a version which manage list:
def get_keys(some_dictionary, parent=None):
if isinstance(some_dictionary, str):
return
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
if isinstance(value, list):
for v in value:
get_keys(v, parent='{}.{}'.format(parent, key))
else:
pass
I think this should do what you're asking:
myjson = {
'transportation': 'car',
'address': {
'driveway': 'yes',
'home_address': {
'state': 'TX',
'city': 'Houston'}
},
'work_address': {
'state': 'TX',
'city': 'Sugarland',
'location': 'office-tower',
'salary': 30000}
}
def get_keys(some_dictionary, parent=None):
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
else:
pass
my_list = []
get_keys(myjson, parent='myjson')
print(my_list)
Outputs:
['myjson.transportation',
'myjson.work_address',
'myjson.work_address.city',
'myjson.work_address.state',
'myjson.work_address.location',
'myjson.work_address.salary',
'myjson.address',
'myjson.address.driveway',
'myjson.address.home_address',
'myjson.address.home_address.city',
'myjson.address.home_address.state']
The key is to just keep calling get_keys() recursively from within the function!
An implementation handling paths of lists in json also.
import json
def get_json_key_path(jsonStr, enable_index):
json_keys = []
jsonObj = json.loads(jsonStr)
def get_key_path(jsonObj, parent=None):
if not isinstance(json_obj, dict):
return
for key, value in jsonObj.items():
if not isinstance(value, list) and '{}.{}'.format(parent, key) not in json_keys:
json_keys.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_key_path(value, parent='{}.{}'.format(parent, key))
elif isinstance(value, list):
i = 0
for obj in value:
if enable_index:
get_key_path(obj, parent='{}.{}.{}'.format(parent, key, i))
else:
get_key_path(obj, parent='{}.{}'.format(parent, key))
i = i + 1
else:
pass
get_key_path(jsonObj, "")
return [ s[1:] for s in json_keys]
I am currently learning python and while trying to find a way to compile data from IP registrations, I stumbled into nested dictionaries/lists. I have found a way to pull the data I wanted but this seems excessive but all of my attempts to consolidate the iteration has failed. Any documentation on this subject or some pointers would be greatly appreciated.
from ipwhois import IPWhois
obj = IPWhois('8.8.8.8')
results = obj.lookup_rdap(depth=1)
for key, val in results.items():
if isinstance(val, dict):
if key == 'objects':
objects = (val)
for key, val in objects.items():
if isinstance(val, dict) and key.startswith("ABUSE"):
abuse =(val)
for key, val in abuse.items():
if isinstance(val, dict):
contact = (val)
for key,val in contact.items():
if isinstance(val, list):
if key == 'email':
email = (val)
value=(email[0])
print (value['value'])
I think this will much clear,you should have a good look at obj's format
from ipwhois import IPWhois
obj = IPWhois('8.8.8.8')
results = obj.lookup_rdap(depth=1)
objects = results['objects']
for key, val in objects.items():
if not key.startswith('ABUSE'):
continue
for ele in val['contact']['email']:
print(ele['value'])
for example, let's say i have a dict like this:
bulk_details={
"ad":'ad',
'ad':[
{'ad':'ad'},
{'ad':'ad'}
]
}
I want to encrypt the values in the dict. i'm stuck at parsing the inner dicts inside the list
my code is this:
new_data = {key: {key_: encrypt(val_) for key_, val_ in (val.items() if type(val) is dict else val)} for key, val in (bulk_details.items() if type(bulk_details) is dict else bulk_details) }
This is not nearly as compact as your one-liner, but it solves your problem and you can perhaps make it more compact:
bulk_details = {
'ad':'ad',
'ad2':
[
{'ad':'ad'},
{'ad':'ad'}
]
}
def encrypt(to_encrypt):
return '?' + to_encrypt + '?'
def encrypt_nested(dt):
if isinstance(dt, dict):
for key, value in dt.items():
if isinstance(value, str):
dt[key] = encrypt(value)
else:
encrypt_nested(value)
return dt
else: # elif isinstance(dt, list)
for value in dt:
if isinstance(value, str):
value = encrypt(value)
else:
encrypt_nested(value)
return dt
print(encrypt_nested(bulk_details))
# {'ad': '?ad?', 'ad2': [{'ad': '?ad?'}, {'ad': '?ad?'}]}
It iterates through a nested dict including arrays for any amount of levels using a recursing function.