I have a nested list that I need to convert into a hierarchical dictionary. However I am a bit confused how to achieve it in a clean pythonic way. Here's a somewhat ugly sample code that I have come up with. How to improve it?
from itertools import tee,izip
import json
L=[(1,2,3,4,5),(1,2,7),(2,3,5),(3,4,5,6)]
def pairs(iterable):
a,b = tee(iterable)
b.next()
return izip(a,b)
def innerfunc(pairs,d):
try:
pair = pairs.next()
item, nextitem = pair
except StopIteration:
return
if item in d:
innerfunc(pairs,d[item])
else:
d[item]= {}
{nextitem : innerfunc(pairs,d[item])}
def outerfunc(matrix):
result_dict={}
for row in matrix:
iter_pairs = pairs(row+(0,))
innerfunc(iter_pairs,result_dict)
return result_dict
print json.dumps(outerfunc(L), sort_keys=True, indent=4)
Output:
{
"1": {
"2": {
"3": {
"4": {
"5": {}
}
},
"7": {}
}
},
"2": {
"3": {
"5": {}
}
},
"3": {
"4": {
"5": {
"6": {}
}
}
}
}
You can do that quite succinctly using recursion:
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
# Example usage
root = {}
for p in [(1,2,3,4,5),(1,2,7),(2,3,5),(3,4,5,6)]:
append_path(root, p)
# Print results
import json
print json.dumps(root, indent=4)
Output:
{
"1": {
"2": {
"3": {
"4": {
"5": {}
}
},
"7": {}
}
},
"2": {
"3": {
"5": {}
}
},
"3": {
"4": {
"5": {
"6": {}
}
}
}
}
Related
Working on a freshwater fish conservation project. I scraped a JSON file that looks like this:
{
"fish": [
{
"id": 0,
"n": "NO INFORMATION",
"a": "NONE",
"i": "none.png"
},
{
"id": 1,
"n": "Hampala barb",
"a": "Hampala macrolepidota",
"i": "hampala.png"
},
{
"id": 2,
"n": "Giant snakehead",
"a": "Channa micropeltes",
"i": "toman.png"
},
{
"id": 3,
"n": "Clown featherback",
"a": "Chitala ornata",
"i": "belida.png"
}
]
}
And I'm trying to extract the keys "id" and "a" into a python dictionary like this:
fish_id = {
0 : "NONE",
1 : "Hampala macrolepidota",
2 : "Channa micropeltes",
3 : "Chitala ornata"
}
import json
data = """{
"fish": [
{
"id": 0,
"n": "NO INFORMATION",
"a": "NONE",
"i": "none.png"
},
{
"id": 1,
"n": "Hampala barb",
"a": "Hampala macrolepidota",
"i": "hampala.png"
},
{
"id": 2,
"n": "Giant snakehead",
"a": "Channa micropeltes",
"i": "toman.png"
},
{
"id": 3,
"n": "Clown featherback",
"a": "Chitala ornata",
"i": "belida.png"
}
]
}"""
data_dict = json.loads(data)
fish_id = {}
for item in data_dict["fish"]:
fish_id[item["id"]] = item["a"]
print(fish_id)
First create a fish.json file and get your JSON file;
with open('fish.json') as json_file:
data = json.load(json_file)
Then, take your fishes;
fish1 = data['fish'][0]
fish2 = data['fish'][1]
fish3 = data['fish'][2]
fish4 = data['fish'][3]
After that take only values for each, because you want to create a dictionary only from values;
value_list1=list(fish1.values())
value_list2=list(fish2.values())
value_list3=list(fish3.values())
value_list4=list(fish4.values())
Finally, create fish_id dictionary;
fish_id = {
f"{value_list1[0]}" : f"{value_list1[2]}",
f"{value_list2[0]}" : f"{value_list2[2]}",
f"{value_list3[0]}" : f"{value_list3[2]}",
f"{value_list4[0]}" : f"{value_list4[2]}",
}
if you run;
print(fish_id)
Result will be like below, but if you can use for loops, it can be more effective.
{'0': 'NONE', '1': 'Hampala macrolepidota', '2': 'Channa micropeltes', '3': 'Chitala ornata'}
I'm struggling with this problem. I have a JSON file and needs ti put it out to CSV, its fine if the structure is kind of flat with no deep nested items.
But in this case the nested RACES is messing me up.
How would I go about getting the data in a format like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
for each object and each race in the object?
{
"1": {
"VENUE": "JOEBURG",
"COUNTRY": "HAE",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
},
"2": {
"VENUE": "FOOBURG",
"COUNTRY": "ABA",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
}, ...
}
I would like to output this to CSV like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
JOEBERG, HAE, XAD, 1, 12:35
JOEBERG, HAE, XAD, 2, 13:10
JOEBERG, HAE, XAD, 3, 13:40
...
...
FOOBURG, ABA, XAD, 1, 12:35
FOOBURG, ABA, XAD, 2, 13:10
So first I get the correct keys:
self.keys = self.data.keys()
keys = ["DATA_KEY"]
for key in self.keys:
if type(self.data[key]) == dict:
for k in self.data[key].keys():
if k not in keys:
if type(self.data[key][k]) == unicode:
keys.append(k)
elif type(self.data[key][k]) == dict:
self.subkey = k
for sk in self.data[key][k].values():
for subkey in sk.keys():
subkey = "%s__%s" % (self.subkey, subkey)
if subkey not in keys:
keys.append(subkey)
Then add the data:
But how?
This should be a fun one for you skilled forloopers. ;-)
I'd collect keys only for the first object, then assume that the rest of the format is consistent.
The following code also limits the nested object to just one; you did not specify what should happen when there is more than one. Having two or more nested structures of equal length could work (you'd 'zip' those together), but if you have structures of differing length you need to make an explicit choice how to handle those; zip with empty columns to pad, or to write out the product of those entries (A x B rows, repeating information from A each time you find a B entry).
import csv
from operator import itemgetter
with open(outputfile, 'wb') as outf:
writer = None # will be set to a csv.DictWriter later
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
For your sample input, this produces:
COUNTRY,ITW,VENUE,RACES__NO,RACES__TIME
HAE,XAD,JOEBURG,1,12:35
HAE,XAD,JOEBURG,2,13:10
HAE,XAD,JOEBURG,3,13:40
HAE,XAD,JOEBURG,4,14:10
HAE,XAD,JOEBURG,5,14:55
HAE,XAD,JOEBURG,6,15:30
HAE,XAD,JOEBURG,7,16:05
HAE,XAD,JOEBURG,8,16:40
ABA,XAD,FOOBURG,1,12:35
ABA,XAD,FOOBURG,2,13:10
ABA,XAD,FOOBURG,3,13:40
ABA,XAD,FOOBURG,4,14:10
ABA,XAD,FOOBURG,5,14:55
ABA,XAD,FOOBURG,6,15:30
ABA,XAD,FOOBURG,7,16:05
ABA,XAD,FOOBURG,8,16:40
I am trying to sort a nested dictionary using its second key where my dictionary looks like:
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
And my expected output will be:
my_dictionary = {
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"char": {
"3": {
"genman": [
"motion"
]
}
}
}
Tried using the following code:
new_dict = {}
for k, v in my_dictionary.items():
for s in sorted(my_dictionary.itervalues()):
if not s.keys()[0]:
new_val = my_dictionary[k].get(s.keys()[0])
my_dictionary[s.keys()[0]] = new_val
my_dictionary.update(new_dict)
It fails badly, and I am getting the same result as my initial dictionary.
This works:
sorted(my_dictionary.items(), key=lambda x: list(x[1].keys())[0])
Returns:
[('EMPT', {'0': {}}),
('veh', {'1': {'tankers': ['varA', 'varB']}}),
('fast', {'2': {'empty': []}}),
('char', {'3': {'genman': ['motion']}})]
Sorted receives a list of key-value pairs, we sort using the result of lambda x: list(x[1].keys())[0] which takes a list of the keys in the inner dict, then grabs the first key (need to do this because dict_keys directly is not indexable).
Edit: the result is a list of key, value pairs but it can be fed into an OrderedDict to use it as a dict.
actually there is no order for a dict, however you can use OrderedDIct instead.
from collections import OrderedDict
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
s = sorted((list(v.keys())[0], k) for k, v in my_dictionary.items())
new_dic = OrderedDict([(k,my_dictionary[k]) for _, k in s])
I'm struggling with this problem. I have a JSON file and needs ti put it out to CSV, its fine if the structure is kind of flat with no deep nested items.
But in this case the nested RACES is messing me up.
How would I go about getting the data in a format like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
for each object and each race in the object?
{
"1": {
"VENUE": "JOEBURG",
"COUNTRY": "HAE",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
},
"2": {
"VENUE": "FOOBURG",
"COUNTRY": "ABA",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
}, ...
}
I would like to output this to CSV like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
JOEBERG, HAE, XAD, 1, 12:35
JOEBERG, HAE, XAD, 2, 13:10
JOEBERG, HAE, XAD, 3, 13:40
...
...
FOOBURG, ABA, XAD, 1, 12:35
FOOBURG, ABA, XAD, 2, 13:10
So first I get the correct keys:
self.keys = self.data.keys()
keys = ["DATA_KEY"]
for key in self.keys:
if type(self.data[key]) == dict:
for k in self.data[key].keys():
if k not in keys:
if type(self.data[key][k]) == unicode:
keys.append(k)
elif type(self.data[key][k]) == dict:
self.subkey = k
for sk in self.data[key][k].values():
for subkey in sk.keys():
subkey = "%s__%s" % (self.subkey, subkey)
if subkey not in keys:
keys.append(subkey)
Then add the data:
But how?
This should be a fun one for you skilled forloopers. ;-)
I'd collect keys only for the first object, then assume that the rest of the format is consistent.
The following code also limits the nested object to just one; you did not specify what should happen when there is more than one. Having two or more nested structures of equal length could work (you'd 'zip' those together), but if you have structures of differing length you need to make an explicit choice how to handle those; zip with empty columns to pad, or to write out the product of those entries (A x B rows, repeating information from A each time you find a B entry).
import csv
from operator import itemgetter
with open(outputfile, 'wb') as outf:
writer = None # will be set to a csv.DictWriter later
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
For your sample input, this produces:
COUNTRY,ITW,VENUE,RACES__NO,RACES__TIME
HAE,XAD,JOEBURG,1,12:35
HAE,XAD,JOEBURG,2,13:10
HAE,XAD,JOEBURG,3,13:40
HAE,XAD,JOEBURG,4,14:10
HAE,XAD,JOEBURG,5,14:55
HAE,XAD,JOEBURG,6,15:30
HAE,XAD,JOEBURG,7,16:05
HAE,XAD,JOEBURG,8,16:40
ABA,XAD,FOOBURG,1,12:35
ABA,XAD,FOOBURG,2,13:10
ABA,XAD,FOOBURG,3,13:40
ABA,XAD,FOOBURG,4,14:10
ABA,XAD,FOOBURG,5,14:55
ABA,XAD,FOOBURG,6,15:30
ABA,XAD,FOOBURG,7,16:05
ABA,XAD,FOOBURG,8,16:40
Here I got a list of string:
['2-3-1-*-*','2-3-*-*-*','2-1-*-*-*','1-4-3-*-*','2-3-2-*-*','2-1-3-*-*','1-1-*-*-*','2-3-1-1-*'];
I am trying to group this string into a structure like this:
--'2-3-*-*-*'
--'2-3-1-*-*'
--'2-3-1-1-*'
--'2-3-2-*-*'
--'2-1-*-*-*'
--'2-1-3-*-*'
--'1-4-3-*-*'
--'1-1-*-*-*'
This is like tree structure. I am beginner in programming, so can someone give me a hint on how can I construct the tree-like structure and any suitable structure that I could use?
s = ['2-3-1-*-*','2-3-*-*-*','2-1-*-*-*','1-4-3-*-*','2-3-2-*-*','2-1-3-*-*','1-1-*-*-*','2-3-1-1-*']
def isSubElement(subelement, element):
return (subelement != element) and all([(e1 == e2) or (e1 == "*" and e2 != "*") for e1, e2 in zip(element.split('-'), subelement.split('-'))])
def parseTree(elementList):
if len(elementList) == 0:
return {}
elements = elementList[:]
d = {}
for element1 in elements:
parent = True
for element2 in elements:
if isSubElement(element1, element2):
parent = False
break
if parent:
d[element1] = {}
for element1 in d.keys():
d[element1] = parseTree([element for element in elements if isSubElement(element, element1)])
return d
print parseTree(s)
OUTPUT:
{'2-1-*-*-*':
{'2-1-3-*-*':
{}},
'1-4-3-*-*':
{},
'1-1-*-*-*':
{},
'2-3-*-*-*':
{'2-3-1-*-*':
{'2-3-1-1-*':
{}},
'2-3-2-*-*':
{}}}
if you're using some js library to render it, save it as a nested dict so that it can be exported to a JSON without hassle.
def parser(items):
nested_dicts = {}
for item in items:
nodes = item.split('-')
current_dict = nested_dicts
for n in nodes[:-1]:
current_dict = current_dict.setdefault(n, {})
last = nodes[-1]
current_dict[last] = current_dict.get(last, 0) + 1
return nested_dicts
sample output with provided data:
{
"1": {
"1": {
"*": {
"*": {
"*": 1
}
}
},
"4": {
"3": {
"*": {
"*": 1
}
}
}
},
"2": {
"1": {
"3": {
"*": {
"*": 1
}
},
"*": {
"*": {
"*": 1
}
}
},
"3": {
"1": {
"1": {
"*": 1
},
"*": {
"*": 1
}
},
"2": {
"*": {
"*": 1
}
},
"*": {
"*": {
"*": 1
}
}
}
}
}