JSON get key path in nested dictionary - python

json = '{
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}'
Here is my json string, which keeps on changing frquently so the keys position within the dictionary is not same everytime, i need to
search for a key and print it corresponding value, Since the json string changes everytime I have written an recursive function(See below) to search
for key in the new json string and print the value. However now the situation is we have same key multiple times with diff values, how can
i get the complete path of the key so it would be easier to understand which key value it is, for example the result should be like this:
app.Garden.Flowers.white Flower = Jasmine
app.Fruits.White Flower = groovy
app.Trees.label.White Flower = Bogan
My code so far:
import json
with open('data.json') as data_file:
j = json.load(data_file)
# j=json.loads(a)
def find(element, JSON):
if element in JSON:
print JSON[element].encode('utf-8')
for key in JSON:
if isinstance(JSON[key], dict):
find(element, JSON[key])
find(element to search,j)

You could add a string parameter that keeps track of the current JSON path. Something like the following could work:
def find(element, JSON, path, all_paths):
if element in JSON:
path = path + element + ' = ' + JSON[element].encode('utf-8')
print path
all_paths.append(path)
for key in JSON:
if isinstance(JSON[key], dict):
find(element, JSON[key],path + key + '.',all_paths)
You would call it like this:
all_paths = []
find(element_to_search,j,'',all_paths)

def getDictValueFromPath(listKeys, jsonData):
"""
>>> mydict = {
'a': {
'b': {
'c': '1'
}
}
}
>>> mykeys = ['a', 'b']
>>> getDictValueFromPath(mykeys, mydict)
{'c': '1'}
"""
localData = jsonData.copy()
for k in listKeys:
try:
localData = localData[k]
except:
return None
return localData
gist

The following code snippet will give a list of paths that are accessible in the JSON. It's following the convention of JSON paths where [] signifies that the path is a list.
def get_paths(source):
paths = []
if isinstance(source, collections.abc.MutableMapping):
for k, v in source.items():
if k not in paths:
paths.append(k)
for x in get_paths(v):
if k + '.' + x not in paths:
paths.append(k + '.' + x)
elif isinstance(source, collections.abc.Sequence) and not isinstance(source, str):
for x in source:
for y in get_paths(x):
if '[].' + y not in paths:
paths.append('[].' + y)
return paths

Here is a modified version of Brian's answer that supports lists and returns the result:
def find(element, JSON, path='', all_paths=None):
all_paths = [] if all_paths is None else all_paths
if isinstance(JSON, dict):
for key, value in JSON.items():
find(element, value, '{}["{}"]'.format(path, key), all_paths)
elif isinstance(JSON, list):
for index, value in enumerate(JSON):
find(element, value, '{}[{}]'.format(path, index), all_paths)
else:
if JSON == element:
all_paths.append(path)
return all_paths
Usage:
find(JSON, element)

Related

How create dict inside dict by string using dot

I have a dict list of dicts.
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
I want to create a dict based on key of this dicts and get value from my class based on value of this dicts.
Client Class and Values initialized:
class Client:
def __init__(self, data):
self.name = data['name']
self.email = data['email']
self.type_document = data['type_document']
self.number_document = data['number_document']
self.docone_number = data['docone_number']
self.docone_type = data['docone_type']
self.doctwo_number = data['doctwo_number']
self.docone_extra_number = data['docone_extra_number']
# this will be passed on build_final_dict(FINAL_DICT, i, k, v, client)
client1 = Client({
"name": "Stack",
"email": "xxxxx#xxxx.com",
"type_document": "TPS",
"number_document": "22222222",
"docone_number": "11111111",
"docone_type": "docone type",
"doctwo_number": "doc two number",
"docone_extra_number": "doc extra number",
})
So I started passing to a function the key/value
FINAL_DICT = {}
for i in dict_list:
for k, v in i.items():
build_final_dict(FINAL_DICT, i, k, v, client)
My build_final_dict()
def build_final_dict(FINAL_DICT, i, k, v, client):
if '.' not in k:
FINAL_DICT[k] = getattr(client, v.replace('client.', ''))
else:
subdict = k.split('.')[0] # ex documento
subdict_key = i.items()[0][0]
subdict_key = subdict_key.replace('%s.' % subdict, '')
subdict_value = i.items()[0][1]
subdict_value = subdict_value.replace('client.', '')
if subdict not in FINAL_DICT:
FINAL_DICT[subdict] = dict()
result_value = getattr(client, subdict_value)
if '.' not in subdict_key:
FINAL_DICT[subdict][subdict_key] = result_value
else:
new_subkey = subdict_key.split('.')[0]
new_subvalue = subdict_key.split('.')[1]
if new_subkey not in FINAL_DICT[subdict]:
FINAL_DICT[subdict][new_subkey] = dict()
build_final_dict(FINAL_DICT[subdict][new_subkey], i, new_subvalue, v, client)
Actual Result:
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra":"doc extra number", // should continue creating dict within dict...
"number":"11111111"
}
}
}
Most of the dictionary is right. But the "extra" dictionary that I put inside a dictionary (third sub level) did not create a new dictionary and put the final value.
There is the possibility of having infinite sub dictionaries, I need my script to be prepared for that.
Result I expected (based on Client class Data):
{
"emailClient":"xxxxx#xxxx.com",
"nameClient":"Stack",
"Document":{
"typeDocument":"TPS",
"numberDocument":"22222222",
"docTwo":{
"number":"doc two number"
},
"docOne":{
"type":"docone type",
"extra": { "number": "doc extra number" }, // dict, not string
"number":"11111111"
}
}
}
I think, this will help you in transforming the initial JSON to the structure you need. This is in accordance to your original requirement of
I want to create a dict based on key of this dicts and get value from
my class based on value of this dicts.
I would try this in a split approach, to create nested dicts for every dict in your list (if they have nesting in keys), and then merge them together as single unit.
See an example approach here:
from collections.abc import MutableMapping
from functools import reduce
def merge(d1, d2):
# Merge 2 dictionaries -deep.
for k, v in d1.items():
if k in d2:
if all(isinstance(e, MutableMapping) for e in (v, d2[k])):
d2[k] = merge(v, d2[k])
md = d1.copy()
md.update(d2)
return md
def explode_to_dict(key, value):
# Create nested dicts based on the key structure
if "." in key:
p, c = key.rsplit(".", 1)
return explode_to_dict(p, {c: value})
else:
return {key: value}
if __name__ == '__main__':
dict_list = [
{"nameClient": "client.name"},
{"emailClient": "client.email"},
{"Document.typeDocument": "client.type_document"},
{"Document.numberDocument": "client.number_document"},
{"Document.docOne.number": "client.docone_number"},
{"Document.docOne.type": "client.docone_type"},
{"Document.docTwo.number": "client.doctwo_number"},
{"Document.docOne.extra.number": "client.docone_extra_number"},
]
result_dict = {}
result_dict = reduce(merge, [explode_to_dict(*d.popitem()) for d in dict_list])
print(json.dumps(result_dict))
This provides a structure like below(expected):
{
"nameClient": "client.name",
"emailClient": "client.email",
"Document": {
"typeDocument": "client.type_document",
"numberDocument": "client.number_document",
"docOne": {
"number": "client.docone_number",
"type": "client.docone_type",
"extra": {
"number": "client.docone_extra_number"
}
},
"docTwo": {
"number": "client.doctwo_number"
}
}
}
I suppose, you can proceed with your class manipulation from here!

Convert dot delimited string to json Python

i've got a dot delimited string which I need to convert to Json. This is an example with different types of strings:
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
I have no problems converting the first type of string using a recursive approach:
def put(d, keys, item):
if "." in keys:
key, rest = keys.split(".", 1)
if key not in d:
d[key] = {}
put(d[key], rest, item)
else:
d[keys] = item
But i'm struggling to find a solution for the lists. Is there a library that provides out of the box string to json conversion? Thank you for your time.
AFAIK, there isn't any modules that would do this
Here is a sample code to converted a series of dotted strings into json format. You just have create a new list when you see the pattern [n] in the string that would be used as a key.
import re
import json
def parse_dotted_strlines(strlines):
res= {}
for line in strlines.splitlines():
parse_dotted_str(line, res)
return res
def parse_dotted_str(s, res):
if '.' in s:
key, rest = s.split('.', 1)
# Check if key represents a list
match = re.search(r'(.*)\[(\d)\]$', key)
if match:
# List
key, index = match.groups()
val = res.get(key, {}) or []
assert type(val) == list, f'Cannot set key {key} as of type list as i
t was earlier marked as {type(val)}'
while len(val) <= int(index):
val.append({})
val[index] = parse_dotted_str(rest, {})
res[key] = val
else:
# Dict
res[key] = parse_dotted_str(rest, res.get(key, {}))
elif '->' in s:
key, val = s.split('->')
res[key.strip()] = val.strip()
return res
Sample input and output
lines = """
my.dictionary.value -> value
my.dictionary.list[0].value -> value
my.dictionary.list[1].value.list[0].value -> value
"""
res = parse_dotted_strlines(lines)
print (json.dumps(res, indent=4))
{
"my": {
"dictionary": {
"value": "value",
"list": [
{
"value": "value"
},
{
"value": {
"list": [
{
"value": "value"
}
]
}
}
]
}
}
}
the json package is what you need
import json
mydict = """{
"str1": "str",
"list1": ["list1_str1", "list1_str2"],
"list2": ["list2_str1", "list2_str2", ["list2_str11", "list_str12"]]
}"""
json.loads(mydict)
>> {'str1': 'str',
'list1': ['list1_str1', 'list1_str2'],
'list2': ['list2_str1', 'list2_str2', ['list2_str11', 'list_str12']]}

How to create directory tree from a list of strings in Python?

I would like to write a fucntion that mimics a folder structure in Python. Given a list of strings I would like to create a tree of folders & subfolders. For example:
['beers', 'wines', 'beers/ipa/stone', 'wines/red/cabernet']
Would output a dictionary with the following:
{
'beers': {
'ipa': {
'stone': {}
}
},
'wines': {
'red': {
'cabernet': {}
}
}
}
x = ['beers', 'wines', 'beers/ipa/stone', 'wines/red/cabernet']
def add_items(d, items):
if len(items) == 1:
if items[0] in d:
return
else:
d[items[0]] = dict()
else:
if items[0] not in d:
d[items[0]] = dict()
add_items(d[items[0]], items[1:])
out = dict()
for item in x:
items = item.split("/")
add_items(out, items)
print(out)
{'wines': {'red': {'cabernet': {}}}, 'beers': {'ipa': {'stone': {}}}}
Just go through you list and add names of each strings that you split on the slash character. You can use setdefault() to ensure that the next level dictionary exist (i.e. auto create entries as you go)
strings = ['beers', 'wines', 'beers/ipa/stone', 'wines/red/cabernet']
directory = dict()
for path in strings:
d = directory
for name in path.split("/"):
d = d.setdefault(name,dict())
print(directory)
{'beers':
{ 'ipa': {'stone': {}} },
'wines':
{'red': {'cabernet': {}} }
}
With Python 3, the order of items in each dict will correspond to their original relative order in the list of strings
If you want the items in each dictionary to appear in alphanumerical order, you can change the loop like this:
directory = dict()
for path in sorted(s.split("/") for s in strings):
d = directory
for name in path:
d = d.setdefault(name,dict())
If you like recursive functions, here's a simple one that does the same thing (but less efficiently):
def makeTree(strings, separator="/", tree=None):
tree = tree or dict()
for name,*subs in (s.split(separator,1) for s in strings):
tree[name] = makeTree(subs, separator, tree.get(name))
return tree
d = makeTree(strings)
print(d)
{'beers':
{ 'ipa': {'stone': {}} },
'wines':
{'red': {'cabernet': {}} }
}

Is there any JSON tag filtering example?

I have one json file and i need to list all "selftext" elements of all data.
Any example of it ?
data example
{ "data": [
{
"selftext": "hello there",
"textex": true,
},
If you want to be able to find a key from an arbitrary json at an arbitrary level, you should use recursion:
def findkey(data, key, resul = None):
if resul is None: resul=[] # initialize an empty list for the results
if isinstance(data, list): # walk down into lists
for d in data:
findkey(d, key, resul)
elif isinstance(data, dict): # dict processing
for k,v in data.items():
if (k == key) and isinstance(v, str): # the expected key and a string value?
resul.append(v)
elif isinstance(v, list) or isinstance(v, dict):
findkey(v, key, resul) # recurse if value is a list or a dict
return resul
Example:
>>> data = { "data": [
{
"selftext": "hello there",
"textex": True,
},
]}
>>> findkey(data, 'selftext')
['hello there']

Pyspark - get attribute names from json file

I am new to pyspark . My requirement is to get/extract the attribute names from a nested json file . I tried using json_normalize imported from pandas package. It works for direct attributes but never fetches the attributes within json array attributes. My json doesn't have a static structure. It varies for each document that we receive. Could someone please help me with explanation for the small example provided below,
{
"id":"1",
"name":"a",
"salaries":[
{
"salary":"1000"
},
{
"salary":"5000"
}
],
"states":{
"state":"Karnataka",
"cities":[
{
"city":"Bangalore"
},
{
"city":"Mysore"
}
],
"state":"Tamil Nadu",
"cities":[
{
"city":"Chennai"
},
{
"city":"Coimbatore"
}
]
}
}
Especially for the json array elements..
Expected output :
id
name
salaries.salary
states.state
states.cities.city``
Here is the another solution for extracting all nested attributes from json
import json
result_set = set([])
def parse_json_array(json_obj, parent_path):
array_obj = list(json_obj)
for i in range(0, len(array_obj)):
json_ob = array_obj[i]
if type(json_obj) == type(json_obj):
parse_json(json_ob, parent_path)
return None
def parse_json(json_obj, parent_path):
for key in json_obj.keys():
key_value = json_obj.get(key)
# if isinstance(a, dict):
if type(key_value) == type(json_obj):
parse_json(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
elif type(key_value) == type(list(json_obj)):
parse_json_array(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
result_set.add((parent_path + "." + key).encode('ascii', 'ignore'))
return None
file_name = "C:/input/sample.json"
file_data = open(file_name, "r")
json_data = json.load(file_data)
print json_data
parse_json(json_data, "")
print list(result_set)
Output:
{u'states': {u'state': u'Tamil Nadu', u'cities': [{u'city': u'Chennai'}, {u'city': u'Coimbatore'}]}, u'id': u'1', u'salaries': [{u'salary': u'1000'}, {u'salary': u'5000'}], u'name': u'a'}
['states.cities.city', 'states.cities', '.id', 'states.state', 'salaries.salary', '.salaries', '.states', '.name']
Note:
My Python version: 2.7
you can do in this way also.
data = { "id":"1", "name":"a", "salaries":[ { "salary":"1000" }, { "salary":"5000" } ], "states":{ "state":"Karnataka", "cities":[ { "city":"Bangalore" }, { "city":"Mysore" } ], "state":"Tamil Nadu", "cities":[ { "city":"Chennai" }, { "city":"Coimbatore" } ] } }
def dict_ittr(lin,data):
for k, v in data.items():
if type(v)is list:
for l in v:
dict_ittr(lin+"."+k,l)
elif type(v)is dict:
dict_ittr(lin+"."+k,v)
pass
else:
print lin+"."+k
dict_ittr("",data)
output
.states.state
.states.cities.city
.states.cities.city
.id
.salaries.salary
.salaries.salary
.name
If you treat the json like a python dictionary, this should work.
I just wrote a simple recursive program.
Script
import json
def js_r(filename):
with open(filename) as f_in:
return(json.load(f_in))
g = js_r("city.json")
answer_d = {}
def base_line(g, answer_d):
for key in g.keys():
answer_d[key] = {}
return answer_d
answer_d = base_line(g, answer_d)
def recurser_func(g, answer_d):
for k in g.keys():
if type(g[k]) == type([]): #If the value is a list
answer_d[k] = {list(g[k][0].keys())[0]:{}}
if type(g[k]) == type({}): #If the value is a dictionary
answer_d[k] = {list(g[k].keys())[0]: {}} #set key equal to
answer_d[k] = recurser_func(g[k], answer_d[k])
return answer_d
recurser_func(g,answer_d)
def printer_func(answer_d, list_to_print, parent):
for k in answer_d.keys():
if len(answer_d[k].keys()) == 1:
list_to_print.append(parent)
list_to_print[-1] += k
list_to_print[-1] += "." + str(list(answer_d[k].keys())[0])
if len(answer_d[k].keys()) == 0:
list_to_print.append(parent)
list_to_print[-1] += k
if len(answer_d[k].keys()) > 1:
printer_func(answer_d[k], list_to_print, k + ".")
return list_to_print
l = printer_func(answer_d, [], "")
final = " ".join(l)
print(final)
Explanation
base_line makes a dictionary of all your base keys.
recursur_func checks if the key's value is a list or dict then adds to the answer dictionary as is necessary until answer_d looks like: {'id': {}, 'name': {}, 'salaries': {'salary': {}}, 'states': {'state': {}, 'cities': {'city': {}}}}
After these 2 functions are called you have a dictionary of keys in a sense. Then printer_func is a recursive function to print it as you desired.
NOTE:
Your question is similar to this one: Get all keys of a nested dictionary but since you have a nested list/dictionary instead of just a nested dictionary, their answers won't work for you, but there is more discussion on the topic on that question if you like more info
EDIT 1
my python version is 3.7.1
I have added a json file opener to the top. I assume that the json is named city.json and is in the same directory
EDIT 2: More thorough explanation
The main difficulty that I found with dealing with your data is the fact that you can have infinitely nested lists and dictionaries. This makes it complicated. Since it was infinite possible nesting, I new this was a recursion problem.
So, I build a dictionary of dictionaries representing the key structure that you are looking for. Firstly I start with the baseline.
base_line makes {'id': {}, 'name': {}, 'salaries': {}, 'states': {}} This is a dictionary of empty dictionaries. I know that when you print. Every key structure (like states.state) starts with one of these words.
recursion
Then I add all the child keys using recursur_func.
When given a dictionary g this function for loop through all the keys in that dictionary and (assuming answer_d has each key that g has) for each key will add that keys child to answer_d.
If the child is a dictionary. Then I recurse with the given dictionary g now being the sub-part of the dictionary that pertains to the children, and answer_d being the sub_part of answer_d that pertains to the child.

Categories

Resources