I am trying to get all keys from a json file in Python.
How to get nested second level(x,y) and third level keys(a,b).
For example, Keys: results,x,y,a,b
Code:
#open data
import json
with open('list.json') as f:
my_dict = json.load(f)
#1
#find keys
for key in my_dict.keys():
print("Keys : {}".format(key))
Json:
{
"results":[
{
"x":5
},
{
"x":5,
"y":[
1,
2,
3
]
},
{
"x":5,
"y":{
"a":2,
"b":67
}
}
]
}
Output:
Keys : results
You need to get the keys which are a part of the value of the JSON.
You therefore need to iterate over the values of my_dict not the keys.
Use recursive function to return all nested keys. Here is the reference stackoverflow page.
import json
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is list:
for i in value:
if type(i) is dict:
yield from recursive_items(i)
else:
yield key
with open('list.json') as f:
my_dict = json.load(f)
#find keys
for key in recursive_items(my_dict):
print("Keys : {}".format(key))
Related
I have a dictionary of dictionaries, like given below:
{
"dev": {
"project_id": "dev_project_id",
"secret_id": "dev_secret_id",
"secret_project_id": "dev_secret_project_id",
"service_account_email": "dev_service_account_email#gmail.com",
"email_list": ["dev_email#gmail.com"],
"core_func_path":"dev/core_func.py",
"secret_id_email": "dev_secret_id_email"
},
"prod": {
"project_id": "prod_project_id",
"secret_id": "prod_secret_id",
"secret_project_id": "prod_secret_project_id",
"service_account_email": "prod_service_account_email#gmail.com",
"email_list": ["prod_email_list#gmail.com"],
"core_func_path":"prod/core_func.py",
"secret_id_email": "prod_secret_id_email"
}
}
And I need to extract key when a specific project_id is provided.
Till now, I have this code, which can get values from a dictionary, however, it is failing for a dictionary of dictionaries.
check_project_id='dev_project_id'
curr_dir = Path(os.path.dirname(os.path.abspath(__file__)))
default_config_dir = os.fspath(Path(curr_dir.parent.parent, 'config').resolve())
constants_path = str(default_config_dir)+'/config.json'
with open(constants_path, 'r') as f:
std_config = json.load(f)
for val in std_config.values():
if(val['project_id']==check_project_id):
print(list(std_config.keys())[list(std_config.values()).index(check_project_id)])
Is there any way I can implement this?
So if I understand correctly, std_config is your dictionary of dictionaries. Just use the items call on the dictionary to be able to extract the key that matches your criteria.
for k, v in std_config.items():
if v["project_id"] == check_project_id:
print(k)
> dev
I'm trying to get Facebook's insights data. the code below was running before but its starts breaking now.
Code:
async_job = {
"id": "169817458353501",
"report_run_id": "169817458353501",
}
results = []
for item in async_job:
print(item, type(item), async_job)
data = dict(item)
results.append(data)
Error:
data = dict(item)
ValueError: dictionary update sequence element #0 has length 1; 2 is required
I have tried with the JSON module as well but seems still not working.
import json
async_job = {
"id": "169817458353501",
"report_run_id": "169817458353501",
}
results = []
for item in async_job:
print(item, type(item), async_job)
data = json.loads(item)
results.append(data)
any leads will be helpful.
You are iterating over the dict keys,
which are only one value on each itearation.
dict needs at least one key-value pair.
Maybe you wish to iterate over dict items (i.e key-value pairs):
async_job = {
"id": "169817458353501",
"report_run_id": "169817458353501",
}
results = []
for key, value in async_job.items():
data = {key: value}
results.append(data)
Alternativly you can use comprehension:
results = [{key: value} for key, value in async_job.items()]
I am new to pyspark . My requirement is to get/extract the attribute names from a nested json file . I tried using json_normalize imported from pandas package. It works for direct attributes but never fetches the attributes within json array attributes. My json doesn't have a static structure. It varies for each document that we receive. Could someone please help me with explanation for the small example provided below,
{
"id":"1",
"name":"a",
"salaries":[
{
"salary":"1000"
},
{
"salary":"5000"
}
],
"states":{
"state":"Karnataka",
"cities":[
{
"city":"Bangalore"
},
{
"city":"Mysore"
}
],
"state":"Tamil Nadu",
"cities":[
{
"city":"Chennai"
},
{
"city":"Coimbatore"
}
]
}
}
Especially for the json array elements..
Expected output :
id
name
salaries.salary
states.state
states.cities.city``
Here is the another solution for extracting all nested attributes from json
import json
result_set = set([])
def parse_json_array(json_obj, parent_path):
array_obj = list(json_obj)
for i in range(0, len(array_obj)):
json_ob = array_obj[i]
if type(json_obj) == type(json_obj):
parse_json(json_ob, parent_path)
return None
def parse_json(json_obj, parent_path):
for key in json_obj.keys():
key_value = json_obj.get(key)
# if isinstance(a, dict):
if type(key_value) == type(json_obj):
parse_json(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
elif type(key_value) == type(list(json_obj)):
parse_json_array(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
result_set.add((parent_path + "." + key).encode('ascii', 'ignore'))
return None
file_name = "C:/input/sample.json"
file_data = open(file_name, "r")
json_data = json.load(file_data)
print json_data
parse_json(json_data, "")
print list(result_set)
Output:
{u'states': {u'state': u'Tamil Nadu', u'cities': [{u'city': u'Chennai'}, {u'city': u'Coimbatore'}]}, u'id': u'1', u'salaries': [{u'salary': u'1000'}, {u'salary': u'5000'}], u'name': u'a'}
['states.cities.city', 'states.cities', '.id', 'states.state', 'salaries.salary', '.salaries', '.states', '.name']
Note:
My Python version: 2.7
you can do in this way also.
data = { "id":"1", "name":"a", "salaries":[ { "salary":"1000" }, { "salary":"5000" } ], "states":{ "state":"Karnataka", "cities":[ { "city":"Bangalore" }, { "city":"Mysore" } ], "state":"Tamil Nadu", "cities":[ { "city":"Chennai" }, { "city":"Coimbatore" } ] } }
def dict_ittr(lin,data):
for k, v in data.items():
if type(v)is list:
for l in v:
dict_ittr(lin+"."+k,l)
elif type(v)is dict:
dict_ittr(lin+"."+k,v)
pass
else:
print lin+"."+k
dict_ittr("",data)
output
.states.state
.states.cities.city
.states.cities.city
.id
.salaries.salary
.salaries.salary
.name
If you treat the json like a python dictionary, this should work.
I just wrote a simple recursive program.
Script
import json
def js_r(filename):
with open(filename) as f_in:
return(json.load(f_in))
g = js_r("city.json")
answer_d = {}
def base_line(g, answer_d):
for key in g.keys():
answer_d[key] = {}
return answer_d
answer_d = base_line(g, answer_d)
def recurser_func(g, answer_d):
for k in g.keys():
if type(g[k]) == type([]): #If the value is a list
answer_d[k] = {list(g[k][0].keys())[0]:{}}
if type(g[k]) == type({}): #If the value is a dictionary
answer_d[k] = {list(g[k].keys())[0]: {}} #set key equal to
answer_d[k] = recurser_func(g[k], answer_d[k])
return answer_d
recurser_func(g,answer_d)
def printer_func(answer_d, list_to_print, parent):
for k in answer_d.keys():
if len(answer_d[k].keys()) == 1:
list_to_print.append(parent)
list_to_print[-1] += k
list_to_print[-1] += "." + str(list(answer_d[k].keys())[0])
if len(answer_d[k].keys()) == 0:
list_to_print.append(parent)
list_to_print[-1] += k
if len(answer_d[k].keys()) > 1:
printer_func(answer_d[k], list_to_print, k + ".")
return list_to_print
l = printer_func(answer_d, [], "")
final = " ".join(l)
print(final)
Explanation
base_line makes a dictionary of all your base keys.
recursur_func checks if the key's value is a list or dict then adds to the answer dictionary as is necessary until answer_d looks like: {'id': {}, 'name': {}, 'salaries': {'salary': {}}, 'states': {'state': {}, 'cities': {'city': {}}}}
After these 2 functions are called you have a dictionary of keys in a sense. Then printer_func is a recursive function to print it as you desired.
NOTE:
Your question is similar to this one: Get all keys of a nested dictionary but since you have a nested list/dictionary instead of just a nested dictionary, their answers won't work for you, but there is more discussion on the topic on that question if you like more info
EDIT 1
my python version is 3.7.1
I have added a json file opener to the top. I assume that the json is named city.json and is in the same directory
EDIT 2: More thorough explanation
The main difficulty that I found with dealing with your data is the fact that you can have infinitely nested lists and dictionaries. This makes it complicated. Since it was infinite possible nesting, I new this was a recursion problem.
So, I build a dictionary of dictionaries representing the key structure that you are looking for. Firstly I start with the baseline.
base_line makes {'id': {}, 'name': {}, 'salaries': {}, 'states': {}} This is a dictionary of empty dictionaries. I know that when you print. Every key structure (like states.state) starts with one of these words.
recursion
Then I add all the child keys using recursur_func.
When given a dictionary g this function for loop through all the keys in that dictionary and (assuming answer_d has each key that g has) for each key will add that keys child to answer_d.
If the child is a dictionary. Then I recurse with the given dictionary g now being the sub-part of the dictionary that pertains to the children, and answer_d being the sub_part of answer_d that pertains to the child.
I have a large nested dictionary with an unknown depth and i would like to know how i can find the keys which led to the value. For example...
{'furniture':{'chair':{'sofa':{'cushion':{}}}}}
Ideally what i am looking for is a function to determine the path to the value that i have entered. I have tried researching online and this is what i tried...
def route(d,key):
if key in d: return d[key]
for k,v in d.items():
if isinstance(v,dict):
item = route(v, key)
if item is not None:
return item
This returns the items inside the key. I am looking to be able to extract the path which leads to that item. For example, route(dictionary,'sofa') then i would be able to get an expected output as such or something similar...
{'sofa':{'chair':'furniture'}}
What are some of the ways that i can achieve this ? Thanks for your help
You can do this recursively and return a list of keys that lead you to your target key:
def route(d, key):
if key in d: return [key]
for k, v in d.items():
if type(v) == dict:
found = route(v, key)
if found: return [k] + found
return []
If we run this on the following dictionary:
data = {
'furniture': {
'chair': {
'sofa': {
'cushion': {}
}
}
},
'electronics': {
'tv': {
'samsung43': 800,
'tcl54': 200
}
}
}
print(route(data, 'cushion'))
print(route(data, 'tcl54'))
print(route(data, 'hello'))
we get the following output:
['furniture', 'chair', 'sofa', 'cushion']
['electronics', 'tv', 'tcl54']
[]
I have a dictionary of unknown depth and structure. It might contain more dictionaries, lists of dictionaries, etc. It's created from deserializing some JSON input created by another system. There's a key, perhaps multiple keys with the same name, at various places in that dictionary. I'd like to get the values at each of those keys and ideally update them.
Given a directory structure like this:
{
"tags":{
"social-1":{
"email":True,
"twitter":True,
"facebook":True,
"linkedin":True,
"type":"social"
},
"primary":{
"type":"flexible",
"width":"auto",
"blocks":[
{
"type":"combo",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":True,
"headline":{
"visible":False
},
"subHeadline":{
"visible":False
},
"imageHolder":{
"visible":True,
"value":[
{
"url":None,
"caption":None,
"width":220,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":200,
"displayHeight":140,
"displayPadding":{
"left":10,
"top":0,
"right":10,
"bottom":10
}
}
],
"smartSizing":True,
"captions":False,
"captionDefault":None
},
"content":{
"visible":True,
"value":"<p>Your text here.</p>"
},
"imagePosition":"left",
"textWrap":False,
"type":"combo"
}
},
{
"type":"image",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":False,
"imageHolder":[
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":213,
"displayHeight":159,
"displayPadding":{
"left":10,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":10,
"bottom":10
}
}
],
"orientation":"horizontal",
"smartSizing":True,
"captions":False,
"captionDefault":None,
"type":"image"
}
}
]
}
}
}
How would I examine and update the values of the imageHolder keys?
You can use a recursive function descending into list and dictionary values:
def get_all(data, key):
sub_iter = []
if isinstance(data, dict):
if key in data:
yield data[key]
sub_iter = data.itervalues()
if isinstance(data, list):
sub_iter = data
for x in sub_iter:
for y in get_all(x, key):
yield y
Recursively walk the tree structure to find the elements you care about. E.g.:
>>> def process(blob):
... for (k,v) in blob.items():
... if k == 'imageHolder':
... ...do-something...
... if isinstance(v, dict):
... process(v)
That if isinstance(v,dict): line might be replaced with various alternatives, depending on exactly what you expect your input to be -- e.g., you could do duck-typing with something like if hasattr(v, 'items').
You need recursion:
def search_key(mykey, mydict):
if isinstance(mydict, dict):
for key, value in mydict.items():
search_key(mykey, value)
if mykey in mydict:
print 'found old_value=%r' % mydict[mykey]
mydict[mykey]='foo' # update
elif isinstance(mydict, (list, tuple)):
for value in mydict:
search_key(mykey, value)
search_key('imageHolder', mydict)
mydict={...} # your long json structure
search_key('imageHolder', mydict)