Convert python dictionary lists to a tree format [closed] - python

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 2 years ago.
Improve this question
Our crawling team has captured industry information on shopping sites such as.
industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
},
]
For ease of access, we would like to be able to convert this to a tree format, for example:
{
"Digital Products": {
"computer parts": {
"Hard Disk" : {}
}
},
"Clothing" : {
"shirt": {},
"jeans": {},
"Dress": {}
}
}

You can do it in O(n) time by initializing a links dictionary with all names plus a None entry for the root. Then add each child to its parent's sub-dictionary (no recursion needed). The root of the links dictionary will point to the final tree:
links = { d["name"]:{} for d in industry_list }
tree = links[None] = dict()
for d in industry_list:
name,parent = d["name"],d.get("parent_ind",None)
links[parent].update({name:links[name]})
output:
print(tree)
{ 'Clothing':
{ 'shirt': {}, 'jeans': {}, 'Dress': {} },
'Digital Products':
{ 'computer parts':
{'Hard Disk': {}}
}
}

Something like this could work:
result = {}
for i in industry_list:
if i.get('parent_ind'):
parent = i.get('parent_ind')
if parent not in result:
result[parent] = {}
del i['parent_ind']
for key, val in result.items():
if parent in val:
result[key][parent][i['name']] = {}
else:
result[parent][i['name']] = {}
key_as_val = any(parent in d.keys() for d in result.values())
if key_as_val is True:
del result[parent]
result:
{
'Clothing': {
'shirt': {},
'jeans': {},
'Dress': {}
},
'Digital Products': {
'computer parts': {
'Hard Disk': {}
}
}
}

This should work I believe... Obviously you'll need to modify it to suit your needs. Right now, you can use this script to print out the accessTree you're after. You'll want to adopt the internal logic and have a python script that dynamically reads your industry lists.
import pprint
pp = pprint.PrettyPrinter(indent=2)
industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
}
]
# Initialize an access tree object
accessTree = {}
# Recursive object search function
def _findItem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v, dict):
item = _findItem(v, key)
if item is not None:
return obj[k][key]
# For each item in the industry list
for listItem in industry_list:
# Recursively check if there is a parent industry
if "parent_ind" in listItem.keys():
parentInd = listItem["parent_ind"]
itemName = listItem["name"]
parentObject = _findItem(accessTree, parentInd)
# If so, add it to the object
if parentObject is not None:
parentObject[itemName] = {}
# Otherwise, add it
elif parentObject is None:
accessTree[parentInd] = {}
accessTree[parentInd][itemName]= {}
print("Final Access Tree as follows: \n")
pp.pprint(accessTree)

industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
},
]
new_industry_list = {}
where_map = {}
while industry_list:
for i, d in enumerate(industry_list):
if 'parent_ind' not in d: # no parent
the_dict = {}
new_industry_list[d['name']] = the_dict
where_map[d['name']] = the_dict
del industry_list[i]
break
if d['parent_ind'] in where_map: # its parent is in the new dictionary and this is where
the_dict = {}
where_map[d['parent_ind']][d['name']] = the_dict
where_map[d['name']] = the_dict
del industry_list[i]
break
print(new_industry_list)
Prints:
{'Clothing': {'shirt': {}, 'jeans': {}, 'Dress': {}}, 'Digital Products': {'computer parts': {'Hard Disk': {}}}}

Related

Resorting a nested json into a dictionary

I have a JSON string in the following format that i get from an API and need to reformat it, so that I can check the difference between two lookups (which settings are different in the different modules):
{ "modules": [
{
"name": "A1",
"bar" : "AA",
"settings" :[
{
"name" : "set1",
"value" : "1"
},
{
"name" : "set2",
"value" : "2"
}
]
},
{
"name": "A2",
"bar" : "DD",
"settings" :[
{
"name" : "set1",
"value" : "A21"
}
]
},
{
"name": "A1",
"settings" :[
{
"name" : "set3",
"value" : "1"
}
]
}
]
}
and need to get it into a dictionary of the format
'A1' : {
'bar' : 'AA',
'settings': {
'set1' : '1',
'set2' : '2',
'set3' : '1'
}....
is there any nicer, easier way to do this than, assuming I have read the string from above in a dictionary json_dict
modules_a = { module['name'] : { 'bar' : module['bar'], 'settings' : {}} for module in json_dict['modules']
for module in json_dict['modules']:
modules_a[module['name']]['settings'].update( s['name']: s['value'] for s in module['settings'] )
you have some errors in the input, you missed a comma after bar. Here is a more readable version:
# First, merge together the modules with the same names
concatenated_json = {'modules': []}
reference_dict = dict()
for module in json["modules"]:
# Check whether module has a 'bar' and whether it has already been mentioned
if module.get("bar") is not None and reference_dict.get(module["bar"]) is None:
# Module has not been mentioned yet, add it to the fixed dict and note its reference
concatenated_json['modules'].append(module)
reference_dict[module["name"]] = module
else:
# Append to settings of a previously mentioned module
reference_dict[module["name"]]["settings"] += module["settings"]
json = concatenated_json
# Format dict in a required way
modules_a = {
module["name"]:{
"bar": module["bar"],
"settings": {
setting["name"]: setting["value"] for setting in module["settings"]
}
}
for module in json["modules"]
}
Here's a way to do it, although I'm not sure what you meant about "resorting".
# Preallocate result.
modules_a = {module['name']: {'settings': []} for module in json_dict['modules']}
for module in json_dict['modules']:
obj = modules_a[module['name']]
obj.update({k: v for k, v in module.items() if k != 'settings'})
# Accumulate 'settings' in a list.
obj['settings'].extend([{setting['name']: setting['value']}
for setting in module['settings'] ])
import json
print(json.dumps(modules_a, indent=4))
Result:
{
"A1": {
"settings": [
{
"set1": "1"
},
{
"set2": "2"
},
{
"set3": "1"
}
],
"bar": "AA",
"name": "A1"
},
"A2": {
"settings": [
{
"set1": "A21"
}
],
"bar": "DD",
"name": "A2"
}
}

update only leaves of a json file from another file

I have a Json file(dataSchema) and I want to fill the leaves with the values of another Json file(data).
Below an example of the two input files and what I want as output.
dataSchema:
"data": {
"keyA": {},
"keyB": {
"keyB1" : {
"keyB11" : ""
}
},
"keyC": {},
"keyD": {},
"keyE": {
"keyE1" : ""
}
}
data:
"data": {
"keyA": {
"keyA1" : {
"keyA11" : ValueA11,
"keyA12" : ValueA12
},
},
"keyB": {
"keyB1" : {
"keyB11" : ValueB11
},
"keyB2" : {
"keyB21" : ValueB21
},
"keyB3" : {
"keyB31" : ValueB31,
"keyB32" : ValueB32
}
},
"keyC": {
"keyC1" : ValueC1
},
"keyD": {
"keyD1" : {
"keyD11" : ValueD11
},
"keyD2" : {
"keyD21" : ValueD21
}
},
"keyE": {
"keyE1" : {
"keyE11" : {
"keyE111" : ValueE111,
"keyE112" : ValueE112
},
"keyE12" : ValueE12
},
"keyE2" : ValueE2
}
}
What I want:
"data": {
"keyA": {
"keyA1" : {
"keyA11" : ValueA11,
"keyA12" : ValueA12
},
},
"keyB": {
"keyB1" : {
"keyB11" : ValueB11
}
},
"keyC": {
"keyC1" : ValueC1
},
"keyD": {
"keyD1" : {
"keyD11" : ValueD11
},
"keyD2" : {
"keyD21" : ValueD21
}
},
"keyE": {
"keyE1" : {
"keyE11" : {
"keyE111" : ValueE111,
"keyE112" : ValueE112
},
"keyE12" : ValueE12
}
}
}
I tried with update function but it includes all sub-keys.
There's a way to do this in elengant and short way with python?
You can use recursion for that:
def is_leaf(v):
return v == {} or not isinstance(v, dict)
def fill_leaves(schema, data):
return {k: v if is_leaf(schema[k]) else fill_leaves(schema[k], data[k]) for k, v in data.items() if k in schema}
Here is the same function with loop instad of dict comprehension (more readable):
def fill_leaves(schema, data):
d_out = {}
for k, v in data.items():
if k in schema:
if is_leaf(schema[k]):
d_out[k] = v
else:
d_out[k] = fill_leaves(schema[k], data[k])
return d_out
well, let's do a recursive walk on schema tree:
def normalize(schema, data, result):
for k in schema:
value = schema[k]
result[k] = value
if not value or not isinstance(value, dict):
# leaf
if k in data:
result[k] = data[k]
elif k in data:
normalize(schema[k], data[k], result[k])
rs = {}
normalize(schema, data, rs)
print json.dumps(rs, indent=2, sort_keys=True)

How to make dynamic nested updates to a dict?

I've a requirement where I've to update/merge nested child of a dict. I've tried dict.update but it strips the sibling (get_users in the the example below).
I can update a dict like tree['endpoints']['get_tickets']['handlers']['after'] = 'new_after_handler', but those dict keys will be dynamic, coming from string, any idea how to achieve this?
So I basically want to get the test below passed, of course endpoints.get_tickets.handlers will be dynamic.
def test_partial_merge(self):
source = {
"name": "tucktock",
"endpoints": {
"get_tickets": {
"path": "tickets",
"handlers": {
"after": "after_handler",
"after_each": "after_each_handler"
}
},
"get_users": {},
},
}
merging = {
"after": "new_after_handler",
}
expected = {
"name": "tucktock",
"endpoints": {
"get_tickets": {
"path": "tickets",
"handlers": {
"after": "new_after_handler",
"after_each": "after_each_handler"
}
},
"get_users": {},
},
}
merger = Merger()
result = merger.merge(source, merging, "endpoints.get_tickets.handlers")
self.assertEqual(expected, result)
You can do something like this:
source = {
"name": "tucktock",
"endpoints": {
"get_tickets": {
"path": "tickets",
"handlers": {
"after": "after_handler",
"after_each": "after_each_handler"
}
},
"get_users": {},
},
}
merging = {
"after": "new_after_handler",
}
expected = {
"name": "tucktock",
"endpoints": {
"get_tickets": {
"path": "tickets",
"handlers": {
"after": "new_after_handler",
"after_each": "after_each_handler"
}
},
"get_users": {},
},
}
def merge(a, b, dict_path): # modifies a in place
for key in dict_path:
a = a[key]
a.update(b)
merge(source, merging, "endpoints.get_tickets.handlers".split('.'))
print(source == expected)
>>> True
In your Merger.merge method you can convert the source to collections.defaultdict(dict). Then you can iterate over the third parameter ("endpoints.get_tickets.handlers".split('.')) and iteratively go to the level of depth you need, then update this part.
Example:
def merge(source, merging, path):
result = defaultdict(dict)
result.update(source)
current_part = result
for key in path.split('.'):
current_level = current_level[key]
current_level.update(merging)
return result

How to store a particular portion of json in a variable based on certain condition in Python?

I have a json which looks like below
result_json = {
"status":"Gov info",
"user_input":[
{
"rule":"Location"
},
{
"des": "This is for location1",
"value": 1
},
{
"des": "This is for location2",
"value": 2
},
{
"rule":"District"
},
{
"des": "This is for district1",
"value": 1
},
{
"des": "This is for district2",
"value": 2
},
{
"des": "This is for district3",
"value": 3
},
{
"des": "This is for district4",
"value": 4
},
{
"rule":"Country"
},
{
"des": "This is for country1",
"value": 1
},
{
"rule":"Continent"
},
{
"des": "This is for continent1",
"value": 1
},
{
"des": "This is for continent2",
"value": 2
},
],
"source":"Gov",
"id":"5ass1"
}
I also have a list like so
lookup = [u'Location', u'District', u'Country', u'Continent']
Now what I want to do is that I look at each value of the list, check against the json for the same value (the value is stored against rule key) and get the sub json right after it until I hit the next rule. For example
The first value in the list lookup is Location. Now I loop through user_input key's value, check against the sub key rule and find out that the value Location matches and right after that store the subsequent dictionaries until I hit the next key rule. So for lookup value Location, after checking against the json and collecting the subsequent dictionary, this is how I will store
filtered_output = {
"Location":[
{
"des":"This is for location1",
"value":1
},
{
"des":"This is for location2",
"value":2
}
]
}
Now I look for next lookup value which is District and the subsequent part of json that will be stored is
filtered_output = {
"Location":[
{
"des":"This is for location1",
"value":1
},
{
"des":"This is for location2",
"value":2
}
],
"District":[
{
"des":"This is for district1",
"value":1
},
{
"des":"This is for district2",
"value":2
},
{
"des":"This is for district3",
"value":3
},
{
"des":"This is for district4",
"value":4
}
]
}
I tried doing something like below
filtered_output = {}
for i in lookout:
temp_json = []
for j in result_json["user_input"]:
if j.get("rule") == i:
temp_json.append(j)
Here it only stores the dictionary that contains the key rule but doesn't continue further until it hits the next rule key. I am not sure how to make this work. Any help will be appreciated.
I would first transform your input to the format you want and them I would onluy filter the keys, something like this:
user_input = result_json["user_input"]
transformed_user_input = {}
for el in user_input:
if "rule" in el:
current_rule = el["rule"]
transformed_user_input[current_rule] = []
else:
transformed_user_input[current_rule].append(el)
lookup = [u'Location', u'District', u'Country', u'Continent']
filtered_user_input = { key: transformed_user_input[key] for key in lookup}
This way, you process your input only once (don't know how big it is).

manipulating json in python using recursion

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?
This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.
This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Categories

Resources