I have a list that I'd like to transform into a nested dictionary. The first element of the list is the parent, the second the child. Can I do this recursively without having to continue creating helper lists for each level? I feel so dumb not understanding this.
relations = [["basket", "money"],
["basket", "fruits"],
["fruits", "orange"],
["fruits", "apple"],
["basket", "vegetables"],
["vegetables", "bean"],
["vegetables", "tomato"],
["tomato", "red tomato"],
["tomato", "green tomato"],
["vegetables", "pepper"],
["sweets", "candy"]]
result = {}
running_list = []
for parent, child in relations:
if parent == "basket":
result[child] = {}
running_list.append(child)
for parent, child in relations:
if parent in running_list:
result[parent] = {child : {}}
print result
Just create a dictionary that maps a name to the corresponding dictionary:
items = {}
for parent, child in relations:
parent_dict = items.setdefault(parent, {})
child_dict = items.setdefault(child, {})
if child not in parent_dict:
parent_dict[child] = child_dict
result = items['basket'] # basket is the top-level item
This produces:
>>> items = {}
>>> for parent, child in relations:
... parent_dict = items.setdefault(parent, {})
... child_dict = items.setdefault(child, {})
... if child not in parent_dict:
... parent_dict[child] = child_dict
...
>>> items['basket']
{'money': {}, 'vegetables': {'tomato': {'green tomato': {}, 'red tomato': {}}, 'bean': {}, 'pepper': {}}, 'fruits': {'orange': {}, 'apple': {}}}
>>> from pprint import pprint
>>> pprint(items['basket'])
{'fruits': {'apple': {}, 'orange': {}},
'money': {},
'vegetables': {'bean': {},
'pepper': {},
'tomato': {'green tomato': {}, 'red tomato': {}}}}
Related
I am having trouble creating a tree hierarchy in Python 3. I'd like to be able to do this without using classes.
The data I need to start with is not in order and in the format ['ID','Parent']:
data=[['E1', 'C1'],['C1', 'P1'],['P1', 'R1'],['E2', 'C2'],['C2', 'P2'],['P2', 'R1'],['C3', 'P2'],['E3', 'C4'],['C4', 'P3'],
['P3', 'R2'],['C5', 'P3'],['E4', 'C6'],['C6', 'P4'], ['P4', 'R2'],['E5', 'C7'],['C7', 'P5'],['P5', 'R3'],['E6', 'C9'],['C9', 'P6'],['P6', 'R3'],
['C8', 'P6'],['E7', 'C10'],['C10', 'P7'],['P7', 'R4'],['C11', 'P7'],['E8', 'C12'],['C12', 'P8'],['P8', 'R4']]
I want to create the (Tree) dictionary variable without the use of classes and end up with something like:
Tree={'R1':{'P1':{},'P2':{}},'R2':{}} etc
OR
Tree={'R1':[{'P1':[],'P2':[]}],'R2':[]} etc
Obviously R1 and R2 have more children than that but perhaps that's what the Tree structure would look like?
You can simply iterate over every child,parent tuple, create dictionary that maps the id's of the child and the parent to a list that contains the children of these elements. We keep doing this until we are done.
roots = set()
mapping = {}
for child,parent in data:
childitem = mapping.get(child,None)
if childitem is None:
childitem = {}
mapping[child] = childitem
else:
roots.discard(child)
parentitem = mapping.get(parent,None)
if parentitem is None:
mapping[parent] = {child:childitem}
roots.add(parent)
else:
parentitem[child] = childitem
Now that we have done that, roots is a set of the ids of the tree roots: so for each such element we know that there is no id that is a parent. For each id in the roots, we can simply fetch from the mapping and that is a dictionary of the structure {'childid':child} where childid is the id (here a string) and child is again a dictionary of that form.
So you can print them like:
for root in roots:
print(mapping[root])
So in your case, the tree is:
tree = { id : mapping[id] for id in roots }
For your sample data, it generates:
>>> tree
{'R1': {'P1': {'C1': {'E1': {}}}, 'P2': {'C2': {'E2': {}}, 'C3': {}}}, 'R2': {'P4': {'C6': {'E4': {}}}, 'P3': {'C5': {}, 'C4': {'E3': {}}}}, 'R3': {'P6': {'C8': {}, 'C9': {'E6': {}}}, 'P5': {'C7': {'E5': {}}}}, 'R4': {'P8': {'C12': {'E8': {}}}, 'P7': {'C11': {}, 'C10': {'E7': {}}}}}
I have method:
#staticmethod
def get_blocks():
"""Public method that can be extended to add new blocks.
First item is the most parent. Last item is the most child.
Returns:
blocks (list)
"""
return ['header', 'body', 'footer']
As docstring describes, this method can be extended , to return any kind of blocks in particular order.
So I want to make a mapping that would indicate which block is parent/child to each other (only caring about "nearest" parent/child).
def _get_blocks_mapping(blocks):
mp = {'parent': {}, 'child': {}}
if not blocks:
return mp
mp['parent'][blocks[0]] = None
mp['child'][blocks[-1]] = None
blocks_len = len(blocks)
if blocks_len > 1:
mp['parent'][blocks[-1]] = blocks[-2]
for i in range(1, len(blocks)-1):
mp['parent'][blocks[i]] = blocks[i-1]
mp['child'][blocks[i]] = blocks[i+1]
return mp
So result if we have three blocks like in get_blocks method is this:
{
'parent': {
'header': None,
'body': 'header',
'footer': 'body',
},
'child': {
'header': 'body',
'body': 'footer',
'footer': None
}
}
Well it works, but it is kind of hacky to me. So maybe someone could suggest a better way to create such mapping? (or maybe there is some used way of creating parent/child mapping? Using different structure than I intend to use?)
You want to loop over the list in pairs, giving you the natural parent-child relationships:
mp = {'parent': {}, 'child': {}}
if blocks:
mp['parent'][blocks[0]] = mp['child'][blocks[-1]] = None
for parent, child in zip(blocks, blocks[1:]):
mp['parent'][child] = parent
mp['child'][parent] = child
zip() here pairs up each block with the next one in the list.
Demo:
>>> blocks = ['header', 'body', 'footer']
>>> mp = {'parent': {}, 'child': {}}
>>> if blocks:
... mp['parent'][blocks[0]] = mp['child'][blocks[-1]] = None
... for parent, child in zip(blocks, blocks[1:]):
... mp['parent'][child] = parent
... mp['child'][parent] = child
...
>>> from pprint import pprint
>>> pprint(mp)
{'child': {'body': 'footer', 'footer': None, 'header': 'body'},
'parent': {'body': 'header', 'footer': 'body', 'header': None}}
For some post-processing, I need to flatten a structure like this
{'foo': {
'cat': {'name': 'Hodor', 'age': 7},
'dog': {'name': 'Mordor', 'age': 5}},
'bar': { 'rat': {'name': 'Izidor', 'age': 3}}
}
into this dataset:
[{'foobar': 'foo', 'animal': 'dog', 'name': 'Mordor', 'age': 5},
{'foobar': 'foo', 'animal': 'cat', 'name': 'Hodor', 'age': 7},
{'foobar': 'bar', 'animal': 'rat', 'name': 'Izidor', 'age': 3}]
So I wrote this function:
def flatten(data, primary_keys):
out = []
keys = copy.copy(primary_keys)
keys.reverse()
def visit(node, primary_values, prim):
if len(prim):
p = prim.pop()
for key, child in node.iteritems():
primary_values[p] = key
visit(child, primary_values, copy.copy(prim))
else:
new = copy.copy(node)
new.update(primary_values)
out.append(new)
visit(data, { }, keys)
return out
out = flatten(a, ['foo', 'bar'])
I was not really satisfied because I have to use copy.copy to protect my inputs. Obviously, when using flatten one does not want the inputs be altered.
Then I thought about one alternative that uses more global variables (at least global to flatten) and uses an index instead of directly passing primary_keys to visit. However, this does not really help me to get rid of the ugly initial copy:
keys = copy.copy(primary_keys)
keys.reverse()
So here is my final version:
def flatten(data, keys):
data = copy.copy(data)
keys = copy.copy(keys)
keys.reverse()
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.iteritems():
values[keys[id]] = key
visit(child, id)
else:
node.update(values)
out.append(node)
visit(data, len(keys))
return out
Is there a better implementation (that can avoid the use of copy.copy)?
Edit: modified to account for variable dictionary depth.
By using the merge function from my previous answer (below), you can avoid calling update which modifies the caller. There is then no need to copy the dictionary first.
def flatten(data, keys):
out = []
values = {}
def visit(node, id):
if id:
id -= 1
for key, child in node.items():
values[keys[id]] = key
visit(child, id)
else:
out.append(merge(node, values)) # use merge instead of update
visit(data, len(keys))
return out
One thing I don't understand is why you need to protect the keys input. I don't see them being modified anywhere.
Previous answer
How about list comprehension?
def merge(d1, d2):
return dict(list(d1.items()) + list(d2.items()))
[[merge({'foobar': key, 'animal': sub_key}, sub_sub_dict)
for sub_key, sub_sub_dict in sub_dict.items()]
for key, sub_dict in a.items()]
The tricky part was merging the dictionaries without using update (which returns None).
I have a data set which follows the structure of the following example:
exampleset = {
'body' : {
'abdomen' : [{
'arms' : {
'value' : 2,
}
},{
'legs': {
'value' : 2,
}
}],
'hands' : {
'fingers' : {
'value' : 5,
}
},
}
}
I am trying to reverse this so I get something like:
{'value': {'value1': {5: {'fingers': {'hands': {'body': {}}}}},
'value2': {2: {'legs': {'abdomen': {'body': {}}}}},
'value3': {2: {'arms': {'abdomen': {'body': {}}}}}},
}
(I hope I got the bracket matching right, but you get the idea.)
I am using a couple of recursion functions to do this, like so:
def recurse_find(data, values, count):
global conf
for key in data:
for v in conf['value_names']:
if key == v:
values[v+str(count)] = {}
values[v+str(count)][data[key]] = {}
count += 1
# originally just using this line:
# values[data[key]] = {}
if type(data[key]) is list:
for i in data[key]:
if type(i) is dict:
values = recurse_find(i, values, count)
values = add_new_level(values, key)
elif type(data[key]) is dict:
values = recurse_find(data[key], values, count)
values = add_new_level(values, key)
return values
def add_new_level(data, new_key):
for key in data:
if data[key] == {}:
data[key][new_key] = {}
else:
data[key] = add_new_level(data[key], new_key)
return data
conf = { "value_names": ["value"] }
for value in conf['value_names']:
values[value] = recurse_find(exampleset, {}, 1)
print(values)
At the moment I only get one value returned correctly, obviously I would like them all. Originally I didn't label the values (value1, value2 etc), but when doing this example set I realised that of course if the values are the same I'll only get one! If I remove the value name keys it finds all the values (unless duplicate) but still doesn't return the correct levels as it includes some of the others while it loops round. I don't care about the order of the values, just that they are labelled differently so I don't miss out any.
Current result:
{'value': {'value1': {5: {'fingers': {'hands': {'body': {}}}}}}}
I think that the solution is the inclusion of a pretty simple step, but I can't see it at the moment and I've already spent too long looking at this.
Any help appreciated.
EDIT:
I've gotten a little further by changing my recursive function to make count a global variable and having count=1 outside the function which has sorted out the getting all the values problem.
I have narrowed down the addition of extra keys to the add_new_level function, but haven't yet figured out how to change it.
Output:
{'value': {'value1': {2: {'arms': {'abdomen': {'legs': {'abdomen': {'fingers': {'hands': {'body': {}}}}}}}}},
'value2': {2: {'legs': {'abdomen': {'fingers': {'hands': {'body': {}}}}}}},
'value3': {5: {'fingers': {'hands': {'body': {}}}}}}}
I have adjusted your output type slightly to make the dictionary containing 'value1' 'value2' etc... to an array. I believe this is better because the order of these will be lost anyway unless an OrderedDict (from collections package) is used and in any case an array will translate quite easily from index 0,1,2,3.. to val1, val2, val3, etc...
res = {'value': []}
def revnest(inp, keys=[]):
res2 = res['value']
if type(inp) == list:
inp = {i:j[i] for j in inp for i in j}
for x in inp:
if x == 'value':
res2.append({inp[x]:{}})
res2 = res2[-1][inp[x]]
for y in keys[::-1]:
res2[y] = {}
res2 = res2[y]
else:
revnest(inp[x], keys+[x])
revnest(exampleset)
print res
which given your exampleset, prints:
{'value': [{2: {'legs': {'abdomen': {'body': {}}}}}, {2: {'arms': {'abdomen': {'body': {}}}}}, {5: {'fingers': {'hands': {'body': {}}}}}]}
I'm trying to build a json hierarchy from a simple table in python.
The data comes in looking like the following:
id parent name
1 10 test-name-1
2 10 test-name-2
3 5 test-name-3
4 none test-name-4
5 10 test-name-5
6 none test-name-6
7 1 test-name-7
8 1 test-name-8
9 8 test-name-9
10 4 test-name-10
and I'm looking for an output like this:
{"$4":{"name":"test-name-4","children":{
"$10":{"name":"test-name-10","children":{
"$1":{"name":"test-name-1","children":{
"$7":{"name":"test-name-7","children":{}},
"$8":{"name":"test-name-8","children":{
"$9":{"name":"test-name-9","children":{}}}}}},
"$2":{"name":"test-name-2","children":{}},
"$5":{"name":"test-name-5","children":{
"$3":{"name":"test-name-3","children":{}}}}}}}},
"$6":{"name":"test-name-6","children":"test-name-6"}}
I have no idea how many "leaves" there will be or "roots", or what order the rows from the csv will come in. My question is, is there a way that I can recursively build a dictionary/list from a child node up to the parent? How can I produce a hierarchical tree from the "leaf" pieces of the tree in python?
Thanks for the help!
I have a solution based on 2 loops too (1 to cache, 1 to build), without JSON encoder, and that gives exactly the output you required:
>>> import re
>>> from collections import defaultdict
>>> parents = defaultdict(list)
>>> for i, line in enumerate(file_.split('\n')):
if i != 0 and line.strip():
id_, parent, name = re.findall(r'[\d\w-]+', line)
parents[parent].append((id_, name))
>>> parents
defaultdict(<type 'list'>, {'10': [('1', 'test-name-1'), ('2', 'test-name-2'), ('5', 'test-name-5')], 'none': [('4', 'test-name-4'), ('6', 'test-name-6')], '1': [('7', 'test-name-7'), ('8', 'test-name-8')], '5': [('3', 'test-name-3')], '4': [('10', 'test-name-10')], '8': [('9', 'test-name-9')]})
OK, now we have our cache, the recursive function easily builds the output we'd like:
>>> def build_tree(d, val):
return {'$' + id_: {'name': name, 'children': build_tree(d, id_)} for id_, name in d[val]}
We just have to call it on the dict built previously, with value 'none' which is the tree root:
>>> from pprint import pprint
>>> pprint(build_tree(parents, 'none'))
{'$4': {'children': {'$10': {'children': {'$1': {'children': {'$7': {'children': {},
'name': 'test-name-7'},
'$8': {'children': {'$9': {'children': {},
'name': 'test-name-9'}},
'name': 'test-name-8'}},
'name': 'test-name-1'},
'$2': {'children': {},
'name': 'test-name-2'},
'$5': {'children': {'$3': {'children': {},
'name': 'test-name-3'}},
'name': 'test-name-5'}},
'name': 'test-name-10'}},
'name': 'test-name-4'},
'$6': {'children': {}, 'name': 'test-name-6'}}
>>>
To assign all child nodes to its parent, you can do two passes over the list of nodes. The first pass adds each node to a UserDict. In the second pass the parent of each node is guaranteed to be in the UserDict so the node can be added to the children of its parent.
To serialize to JSON a JSONEncoder can be used.
#!/usr/bin/env python
import sys
import json
import UserDict
class Node(object):
def __init__(self, nid, parent, name):
self.nid = nid
self.parent = parent
self.children = []
self.name = name
class NodeDict(UserDict.UserDict):
def addNodes(self, nodes):
""" Add every node as a child to its parent by doing two passes."""
for i in (1, 2):
for node in nodes:
self.data[node.nid] = node
if node.parent in self.data.keys():
if node.parent != "none" and
node not in self.data[node.parent].children:
self.data[node.parent].children.append(node)
class NodeJSONEncoder(json.JSONEncoder):
def default(self, node):
if type(node) == Node:
return {"nid":node.nid, "name":node.name, "children":node.children}
raise TypeError("{} is not an instance of Node".format(node))
if __name__ == "__main__":
nodes = []
with open(sys.argv[1]) as f:
for row in f.readlines()[1:]:
nid, parent, name = row.split()
nodes.append(Node(nid, parent, name))
nodeDict = NodeDict()
nodeDict.addNodes(nodes)
rootNodes = [node for nid, node in nodeDict.items()
if node.parent == "none"]
for rootNode in rootNodes:
print NodeJSONEncoder().encode(rootNode)
Result:
{"name": "test-name-4", "nid": "4", "children":[
{"name": "test-name-10", "nid": "10", "children":[
{"name": "test-name-1", "nid": "1", "children":[
{"name": "test-name-7", "nid": "7", "children": []},
{"name": "test-name-8", "nid": "8", "children":[
{"name": "test-name-9", "nid": "9", "children": []}]}]},
{"name": "test-name-2", "nid": "2", "children": []},
{"name": "test-name-5", "nid": "5", "children":[
{"name": "test-name-3", "nid": "3", "children": []}]}]}]}
{"name": "test-name-6", "nid": "6", "children": []}
The answer given did not work for me in python 3.6 because Dict.Dict has been deprecated. So I made some changes to make it work and generalized it a little by letting user specify columns for child_id, parent_id and child name via command line. Please see below (I am just learning and am sure this could be improved, but it works for my purposes).
""" Converts a CSV file with Parent/Child Hierarchy to a hierarchical JSON file for front-end processing (javascript/DS)
USAGE: csv2json.py <somefile.csv> a b c (column nrs of a=child_id, b=parent-id, c=name(of child))
ROOT of hierarchy should contain child_id and parent_id = 'none' or blank. name must exist """
import sys
import json
import csv
#import UserDict
from collections import UserDict
class Node(object):
def __init__(self, child_id, parent_id, name):
self.child_id = child_id
self.parent_id = parent_id
self.children = []
self.name = name
class NodeDict(UserDict):
def addNodes(self, nodes):
""" Add every node as a child to its parent_id by doing two passes."""
for i in (1, 2):
for node in nodes:
self.data[node.child_id] = node
if node.parent_id in self.data.keys():
if (node.parent_id != "none" or node.parent_id != "") and node not in self.data[node.parent_id].children:
self.data[node.parent_id].children.append(node)
class NodeJSONEncoder(json.JSONEncoder):
def default(self, node):
if type(node) == Node:
return {"name":node.name, "children":node.children}
raise TypeError("{} is not an instance of Node".format(node))
if __name__ == "__main__":
nodes = []
with open(sys.argv[1], 'r') as f:
reader = csv.reader(f)
for row in reader:
if not row[int(sys.argv[4])] : #skip if no name/label exists
continue
child_id, parent_id, name = row[int(sys.argv[2])] , row[int(sys.argv[3])] , row[int(sys.argv[4])]
nodes.append(Node(child_id, parent_id, name))
nodeDict = NodeDict()
nodeDict.addNodes(nodes)
rootNodes = [node for child_id, node in nodeDict.items()
if (node.parent_id == "none" or node.parent_id == "")]
for rootNode in rootNodes:
print(NodeJSONEncoder().encode(rootNode))