Extracting the keys associated in previous levels nested dictionary - python

I have a large nested dictionary with an unknown depth and i would like to know how i can find the keys which led to the value. For example...
{'furniture':{'chair':{'sofa':{'cushion':{}}}}}
Ideally what i am looking for is a function to determine the path to the value that i have entered. I have tried researching online and this is what i tried...
def route(d,key):
if key in d: return d[key]
for k,v in d.items():
if isinstance(v,dict):
item = route(v, key)
if item is not None:
return item
This returns the items inside the key. I am looking to be able to extract the path which leads to that item. For example, route(dictionary,'sofa') then i would be able to get an expected output as such or something similar...
{'sofa':{'chair':'furniture'}}
What are some of the ways that i can achieve this ? Thanks for your help

You can do this recursively and return a list of keys that lead you to your target key:
def route(d, key):
if key in d: return [key]
for k, v in d.items():
if type(v) == dict:
found = route(v, key)
if found: return [k] + found
return []
If we run this on the following dictionary:
data = {
'furniture': {
'chair': {
'sofa': {
'cushion': {}
}
}
},
'electronics': {
'tv': {
'samsung43': 800,
'tcl54': 200
}
}
}
print(route(data, 'cushion'))
print(route(data, 'tcl54'))
print(route(data, 'hello'))
we get the following output:
['furniture', 'chair', 'sofa', 'cushion']
['electronics', 'tv', 'tcl54']
[]

Related

Python, get dictionary, nested dictionary, nested list keys

I am trying to get all keys from a json file in Python.
How to get nested second level(x,y) and third level keys(a,b).
For example, Keys: results,x,y,a,b
Code:
#open data
import json
with open('list.json') as f:
my_dict = json.load(f)
#1
#find keys
for key in my_dict.keys():
print("Keys : {}".format(key))
Json:
{
"results":[
{
"x":5
},
{
"x":5,
"y":[
1,
2,
3
]
},
{
"x":5,
"y":{
"a":2,
"b":67
}
}
]
}
Output:
Keys : results
You need to get the keys which are a part of the value of the JSON.
You therefore need to iterate over the values of my_dict not the keys.
Use recursive function to return all nested keys. Here is the reference stackoverflow page.
import json
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is list:
for i in value:
if type(i) is dict:
yield from recursive_items(i)
else:
yield key
with open('list.json') as f:
my_dict = json.load(f)
#find keys
for key in recursive_items(my_dict):
print("Keys : {}".format(key))

Is there any JSON tag filtering example?

I have one json file and i need to list all "selftext" elements of all data.
Any example of it ?
data example
{ "data": [
{
"selftext": "hello there",
"textex": true,
},
If you want to be able to find a key from an arbitrary json at an arbitrary level, you should use recursion:
def findkey(data, key, resul = None):
if resul is None: resul=[] # initialize an empty list for the results
if isinstance(data, list): # walk down into lists
for d in data:
findkey(d, key, resul)
elif isinstance(data, dict): # dict processing
for k,v in data.items():
if (k == key) and isinstance(v, str): # the expected key and a string value?
resul.append(v)
elif isinstance(v, list) or isinstance(v, dict):
findkey(v, key, resul) # recurse if value is a list or a dict
return resul
Example:
>>> data = { "data": [
{
"selftext": "hello there",
"textex": True,
},
]}
>>> findkey(data, 'selftext')
['hello there']

Pyspark - get attribute names from json file

I am new to pyspark . My requirement is to get/extract the attribute names from a nested json file . I tried using json_normalize imported from pandas package. It works for direct attributes but never fetches the attributes within json array attributes. My json doesn't have a static structure. It varies for each document that we receive. Could someone please help me with explanation for the small example provided below,
{
"id":"1",
"name":"a",
"salaries":[
{
"salary":"1000"
},
{
"salary":"5000"
}
],
"states":{
"state":"Karnataka",
"cities":[
{
"city":"Bangalore"
},
{
"city":"Mysore"
}
],
"state":"Tamil Nadu",
"cities":[
{
"city":"Chennai"
},
{
"city":"Coimbatore"
}
]
}
}
Especially for the json array elements..
Expected output :
id
name
salaries.salary
states.state
states.cities.city``
Here is the another solution for extracting all nested attributes from json
import json
result_set = set([])
def parse_json_array(json_obj, parent_path):
array_obj = list(json_obj)
for i in range(0, len(array_obj)):
json_ob = array_obj[i]
if type(json_obj) == type(json_obj):
parse_json(json_ob, parent_path)
return None
def parse_json(json_obj, parent_path):
for key in json_obj.keys():
key_value = json_obj.get(key)
# if isinstance(a, dict):
if type(key_value) == type(json_obj):
parse_json(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
elif type(key_value) == type(list(json_obj)):
parse_json_array(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
result_set.add((parent_path + "." + key).encode('ascii', 'ignore'))
return None
file_name = "C:/input/sample.json"
file_data = open(file_name, "r")
json_data = json.load(file_data)
print json_data
parse_json(json_data, "")
print list(result_set)
Output:
{u'states': {u'state': u'Tamil Nadu', u'cities': [{u'city': u'Chennai'}, {u'city': u'Coimbatore'}]}, u'id': u'1', u'salaries': [{u'salary': u'1000'}, {u'salary': u'5000'}], u'name': u'a'}
['states.cities.city', 'states.cities', '.id', 'states.state', 'salaries.salary', '.salaries', '.states', '.name']
Note:
My Python version: 2.7
you can do in this way also.
data = { "id":"1", "name":"a", "salaries":[ { "salary":"1000" }, { "salary":"5000" } ], "states":{ "state":"Karnataka", "cities":[ { "city":"Bangalore" }, { "city":"Mysore" } ], "state":"Tamil Nadu", "cities":[ { "city":"Chennai" }, { "city":"Coimbatore" } ] } }
def dict_ittr(lin,data):
for k, v in data.items():
if type(v)is list:
for l in v:
dict_ittr(lin+"."+k,l)
elif type(v)is dict:
dict_ittr(lin+"."+k,v)
pass
else:
print lin+"."+k
dict_ittr("",data)
output
.states.state
.states.cities.city
.states.cities.city
.id
.salaries.salary
.salaries.salary
.name
If you treat the json like a python dictionary, this should work.
I just wrote a simple recursive program.
Script
import json
def js_r(filename):
with open(filename) as f_in:
return(json.load(f_in))
g = js_r("city.json")
answer_d = {}
def base_line(g, answer_d):
for key in g.keys():
answer_d[key] = {}
return answer_d
answer_d = base_line(g, answer_d)
def recurser_func(g, answer_d):
for k in g.keys():
if type(g[k]) == type([]): #If the value is a list
answer_d[k] = {list(g[k][0].keys())[0]:{}}
if type(g[k]) == type({}): #If the value is a dictionary
answer_d[k] = {list(g[k].keys())[0]: {}} #set key equal to
answer_d[k] = recurser_func(g[k], answer_d[k])
return answer_d
recurser_func(g,answer_d)
def printer_func(answer_d, list_to_print, parent):
for k in answer_d.keys():
if len(answer_d[k].keys()) == 1:
list_to_print.append(parent)
list_to_print[-1] += k
list_to_print[-1] += "." + str(list(answer_d[k].keys())[0])
if len(answer_d[k].keys()) == 0:
list_to_print.append(parent)
list_to_print[-1] += k
if len(answer_d[k].keys()) > 1:
printer_func(answer_d[k], list_to_print, k + ".")
return list_to_print
l = printer_func(answer_d, [], "")
final = " ".join(l)
print(final)
Explanation
base_line makes a dictionary of all your base keys.
recursur_func checks if the key's value is a list or dict then adds to the answer dictionary as is necessary until answer_d looks like: {'id': {}, 'name': {}, 'salaries': {'salary': {}}, 'states': {'state': {}, 'cities': {'city': {}}}}
After these 2 functions are called you have a dictionary of keys in a sense. Then printer_func is a recursive function to print it as you desired.
NOTE:
Your question is similar to this one: Get all keys of a nested dictionary but since you have a nested list/dictionary instead of just a nested dictionary, their answers won't work for you, but there is more discussion on the topic on that question if you like more info
EDIT 1
my python version is 3.7.1
I have added a json file opener to the top. I assume that the json is named city.json and is in the same directory
EDIT 2: More thorough explanation
The main difficulty that I found with dealing with your data is the fact that you can have infinitely nested lists and dictionaries. This makes it complicated. Since it was infinite possible nesting, I new this was a recursion problem.
So, I build a dictionary of dictionaries representing the key structure that you are looking for. Firstly I start with the baseline.
base_line makes {'id': {}, 'name': {}, 'salaries': {}, 'states': {}} This is a dictionary of empty dictionaries. I know that when you print. Every key structure (like states.state) starts with one of these words.
recursion
Then I add all the child keys using recursur_func.
When given a dictionary g this function for loop through all the keys in that dictionary and (assuming answer_d has each key that g has) for each key will add that keys child to answer_d.
If the child is a dictionary. Then I recurse with the given dictionary g now being the sub-part of the dictionary that pertains to the children, and answer_d being the sub_part of answer_d that pertains to the child.

how to modify the key of a nested Json

I am trying to update the keys of a JSON object which looks like this:
results =
{
'Game':12345,
'stats':[
{
'detail':[
{
'goals':4,
'refs':{
'number':0
I am currently manually updating each key as follow
##update Game to newValue
results['newValue'] = results['Game']
del results['Game']
## update nested key "goals" to "goals_against"
results['stats'][0]['detail'][0]['goals_against'] = results['stats'][0]['detail'][0]['goals']
del results['stats'][0]['detail'][0]['goals']
there has to be a better way to do as I am finding myself having to update multiple keys on results. For example, I also want to update the "number" key to "assis_ref".
I know how to update a key if is the json file is "simple": ie if i could do this:
result['stats']['details']['refs']
however, 'stats' and 'details' require [0] next to it which i assume is the index of the element i am trying to go next.
I wrote a recursive function to handle transforming keys in json objects. Especially useful for deeply nested json!
def walk_json(obj, key_transform):
"""
Recurse over a json object modifying the keys according to the `key_transform` function.
Returns a new json object with the modified keys.
"""
assert isinstance(obj, dict), "walk_json expects obj to be of type dict"
def _walk_json(obj, new):
if isinstance(obj, dict):
if isinstance(new, dict):
for key, value in obj.items():
new_key = key_transform(key)
if isinstance(value, dict):
new[new_key] = {}
_walk_json(value, new=new[new_key])
elif isinstance(value, list):
new[new_key] = []
for item in value:
_walk_json(item, new=new[new_key])
else: # take value as is
new[new_key] = value
elif isinstance(new, list):
new.append(_walk_json(obj, new={}))
else: # take object as is
new.append(obj)
return new
return _walk_json(obj, new={})
Here's how it's used for an overly simple json object:
def my_key_transform(x):
return x.upper()
my_obj = {"a": 1, "b": 2, "c": 3}
result = walk_json(my_obj, key_transform=my_key_transform)
result
{"A": 1, "B": 2, "C": 3}
It can be painful navigating and modify deeply nested objects derived from JSON objects. In Functions that help to understand json(dict) structure I posted code that allows you to navigate such objects. Please read the explanation in that answer. In this answer, I'll show how you can use that code to modify the dictionary keys in such objects.
Briefly, find_key is a recursive generator that will find all the keys with a given name. You can use the next function to get the first (or only) matching name. Or call find_key in a for loop if you need to work with multiple keys that have the same name.
Each value yielded by find_key is a list of the dict keys and list indices need to reach the desired key.
from json import dumps
def find_key(obj, key):
if isinstance(obj, dict):
yield from iter_dict(obj, key, [])
elif isinstance(obj, list):
yield from iter_list(obj, key, [])
def iter_dict(d, key, indices):
for k, v in d.items():
if k == key:
yield indices + [k], v
if isinstance(v, dict):
yield from iter_dict(v, key, indices + [k])
elif isinstance(v, list):
yield from iter_list(v, key, indices + [k])
def iter_list(seq, key, indices):
for k, v in enumerate(seq):
if isinstance(v, dict):
yield from iter_dict(v, key, indices + [k])
elif isinstance(v, list):
yield from iter_list(v, key, indices + [k])
results = {
"Game": 12345,
"stats": [
{
"detail": [
{
"goals": 4,
"refs": {
"number": 0
}
}
]
}
]
}
# Change oldkey to newkey
oldkey, newkey = 'goals', 'goals_against'
# Find the first occurrence of the oldkey
seq, val = next(find_key(results, oldkey))
print('seq:', seq, 'val:', val)
# Get the object that contains the oldkey
obj = results
for k in seq[:-1]:
obj = obj[k]
# Change the key
obj[newkey] = obj.pop(oldkey)
print(dumps(results, indent=4))
output
seq: ['stats', 0, 'detail', 0, 'goals'] val: 4
{
"Game": 12345,
"stats": [
{
"detail": [
{
"refs": {
"number": 0
},
"goals_against": 4
}
]
}
]
}

Get value of key in arbitrary dictionary of unknown depth

I have a dictionary of unknown depth and structure. It might contain more dictionaries, lists of dictionaries, etc. It's created from deserializing some JSON input created by another system. There's a key, perhaps multiple keys with the same name, at various places in that dictionary. I'd like to get the values at each of those keys and ideally update them.
Given a directory structure like this:
{
"tags":{
"social-1":{
"email":True,
"twitter":True,
"facebook":True,
"linkedin":True,
"type":"social"
},
"primary":{
"type":"flexible",
"width":"auto",
"blocks":[
{
"type":"combo",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":True,
"headline":{
"visible":False
},
"subHeadline":{
"visible":False
},
"imageHolder":{
"visible":True,
"value":[
{
"url":None,
"caption":None,
"width":220,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":200,
"displayHeight":140,
"displayPadding":{
"left":10,
"top":0,
"right":10,
"bottom":10
}
}
],
"smartSizing":True,
"captions":False,
"captionDefault":None
},
"content":{
"visible":True,
"value":"<p>Your text here.</p>"
},
"imagePosition":"left",
"textWrap":False,
"type":"combo"
}
},
{
"type":"image",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":False,
"imageHolder":[
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":213,
"displayHeight":159,
"displayPadding":{
"left":10,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":10,
"bottom":10
}
}
],
"orientation":"horizontal",
"smartSizing":True,
"captions":False,
"captionDefault":None,
"type":"image"
}
}
]
}
}
}
How would I examine and update the values of the imageHolder keys?
You can use a recursive function descending into list and dictionary values:
def get_all(data, key):
sub_iter = []
if isinstance(data, dict):
if key in data:
yield data[key]
sub_iter = data.itervalues()
if isinstance(data, list):
sub_iter = data
for x in sub_iter:
for y in get_all(x, key):
yield y
Recursively walk the tree structure to find the elements you care about. E.g.:
>>> def process(blob):
... for (k,v) in blob.items():
... if k == 'imageHolder':
... ...do-something...
... if isinstance(v, dict):
... process(v)
That if isinstance(v,dict): line might be replaced with various alternatives, depending on exactly what you expect your input to be -- e.g., you could do duck-typing with something like if hasattr(v, 'items').
You need recursion:
def search_key(mykey, mydict):
if isinstance(mydict, dict):
for key, value in mydict.items():
search_key(mykey, value)
if mykey in mydict:
print 'found old_value=%r' % mydict[mykey]
mydict[mykey]='foo' # update
elif isinstance(mydict, (list, tuple)):
for value in mydict:
search_key(mykey, value)
search_key('imageHolder', mydict)
mydict={...} # your long json structure
search_key('imageHolder', mydict)

Categories

Resources