How to flatten some levels of nested dictionary? - python

I have nested dictionary d as below
d = {'id': {"a,b": {'id': {"x": {'id': None},
"y": {'id': {"a": {'id': None},
"b": {'id': None}}}}},
"c,d": {'id': {"c": {'id': None},
"d": {'id': {"x": {'id': None},
"y": {'id': None}}}}}}}
and would like unnest some levels and compress it to the following output:
{"a,b": {"x": None,
"y": {"a": None,
"b": None}},
"c,d": {"c": None,
"d": {"x": None,
"y": None}}}
Would like to unnest any nested dictionary with the key id and replace it with the inner dictionary
My starting point is:
def unnest_dictionary(d):
for k,v in d.items():
if isinstance(v, dict):
unnest_dictionary(v)
if k=='id':
......
Not sure how to unnest it from there

Here is how i ended up solving for it
I flattened the dictionary, removed the levels with id then nested it back again
import re
d = {'id': {"a,b": {'id': {"x": {'id': None},
"y": {'id': {"a": {'id': None},
"b": {'id': None}}}}},
"c,d": {'id': {"c": {'id': None},
"d": {'id': {"x": {'id': None},
"y": {'id': None}}}}}}}
def flatten_dict(dd, separator ='_', prefix =''):
return { prefix + separator + k if prefix else k : v
for kk, vv in dd.items()
for k, v in flatten_dict(vv, separator, kk).items()
} if isinstance(dd, dict) else { prefix : dd }
def nest_dict(dict1):
result = {}
for k, v in dict1.items():
split_rec(k, v, result)
return result
def split_rec(k, v, out):
k, *rest = k.split('_', 1)
if rest:
split_rec(rest[0], v, out.setdefault(k, {}))
else:
out[k] = v
flat_d = flatten_dict(d)
for k in list(flat_d.keys()):
new_key = re.sub(r'_id|id_','',k)
flat_d[new_key] = flat_d.pop(k)
nested_d = nest_dict(flat_d)
print(nested_d)
# {'a,b': {'x': None, 'y': {'a': None, 'b': None}}, 'c,d': {'c': None, 'd': {'x': None, 'y': None}}}

Related

Fill Multiple empty values in python dictionary for particular key all over dictionary

I have a dictionary as below.
Key id is present multiple times inside dictionary.I need to fill id value at all places in dicts in single line of code.
Currently I am writing multiple line of code to fill empty values.
dicts = {
"abc": {
"a":{"id": "", "id1":""},
"b":{"id": "","hey":"1223"},
"c":{"id": "","hello":"4564"}
},
"xyz": {
"d":{"id": "","id1":"", "ijk":"water"}
},
"f":{"id": ""},
"g":{"id1": ""}
}
id = 123
dicts['abc']['a']['id'] = id
dicts['abc']['b']['id'] = id
dicts['abc']['c']['id'] = id
dicts['xyz']['d']['id'] = id
dicts['f']['id'] = id
dicts
Output:
{'abc': {'a': {'id': 123,"id1":""},
'b': {'id': 123, 'hey': '1223'},
'c': {'id': 123, 'hello': '4564'}},
'xyz': {'d': {'id': 123,id1:"", 'ijk': 'water'}},
'f': {'id': 123}, "g":{"id1": ""}}
You can solve it in place via simple recursive function, for example:
id = 123
dicts = {
"abc": {
"a": {"id": "", "id1": ""},
"b": {"id": "", "hey": "1223"},
"c": {"id": "", "hello": "4564"}
},
"xyz": {
"d": {"id": "", "id1": "", "ijk": "water"}
},
"f": {"id": ""},
"g": {"id1": ""}
}
def process(dicts):
for k, v in dicts.items():
if k == 'id' and not dicts[k]:
dicts[k] = id
if isinstance(v, dict):
process(v)
process(dicts)
print(dicts)
Output:
{
'abc': {'a': {'id': 123, 'id1': ''},
'b': {'id': 123, 'hey': '1223'},
'c': {'id': 123, 'hello': '4564'}},
'xyz': {'d': {'id': 123, 'id1': '', 'ijk': 'water'}},
'f': {'id': 123}, 'g': {'id1': ''}
}

Reduce levels of nested dictionaries when they have one element

I need to reduce the nested levels of a dictionary when a node has 1 element, by appending the inner key to the upper key.
Example:
Given this dictionary:
{'A': {'a': {'1': {}}},
'B': {'b': {'2': {}},
'c': {'3': {'x': {}}},
'd': {},
'e': {'0': {},
'1': {},
},
},
}
I need to return:
{'A a 1': {},
'B': {'b 2': {},
'c 3 x': {},
'd': {},
'e': {'0': {},
'1': {},
},
},
}
It should be generic for any number of levels, and the last element is always an empty dict.
You can first flatten the structure to retrieve all paths and then rebuild it using collections.defaultdict:
import collections
data = {'A': {'a': {'1': {}}}, 'B': {'b': {'2': {}}, 'c': {'3': {'x': {}}}, 'd': {}, 'e': {'0': {}, '1': {}}}}
def flatten(d, c = []):
for a, b in d.items():
if not b:
yield (c+[a], b)
else:
yield from flatten(b, c +[a])
def compress(d):
_d, r = collections.defaultdict(list), {}
for [a, *b], c in d:
_d[a].append((b, c))
for a, b in _d.items():
val = compress(b) if len(b) > 1 and all(j for j, _ in b) else b[0][-1]
r[a if len(b) > 1 else a+' '+' '.join(b[0][0])] = val
return r
print(compress(list(flatten(data))))
Output:
{'A a 1': {},
'B': {'b 2': {},
'c 3 x': {},
'd ': {},
'e': {'0 ': {},
'1 ': {}}
}
}
I believe this recursive function works for your example:
def flatten_keys(key_so_far = '', d={}):
if len(d) > 1:
sub_dict = {}
for (k,v) in d.items():
sub_dict.update(flatten_keys(k, v))
return {key_so_far: sub_dict} if key_so_far else sub_dict
elif d == {}:
return {key_so_far: {}}
else:
k,v = list(d.items())[0]
key_so_far += (' ' if key_so_far else '') + k
return(flatten_keys(key_so_far, v))
input_d = {'A': {'a': {'1': {}}},
'B': {'b': {'2': {}},
'c': {'3': {'x': {}}},
'd': {},
'e': {'0': {},
'1': {},
},
},
}
flatten_keys(input_d)
# {'A a 1': {}, 'B': {'b 2': {}, 'c 3 x': {}, 'd': {}, 'e': {'0': {}, '1': {}}}}

Best approach to convert flat nested data to hierarchical data in python

I am representing category hierarchy in flat manner.
Category Hierarchy is
category1
category4
category6
category5
category7
category2
category3
I am storing this as list using dictionary
d = [{'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1},
{'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1},
{'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1},
{'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2},
{'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2},
{'id': 7, 'name': 'category6', 'parent_category_id': 4, 'level': 3},
{'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}]
What can be best approach to convert this category list to hierarchical list like
[{'name': 'category1',
'subcategory': [{'name': 'category4',
'subcategory': [{'name': 'category6', 'subcategory': []}]},
{'name': 'category5',
'subcategory': [{'name': 'category7', 'subcategory': []}]}]},
{'name': 'category2', 'subcategory': []},
{'name': 'category3', 'subcategory': []}]
Your problem is very similar to that I answered at: Calculating the Path from Parent Child Relationships
I note that you seem to have a lot of superfluous fields in your data structures. Essentially you could represent the information in the post by:
d = {1: {4: {6: None}, 5: {7: None}}, 2: None, 3: None}
Reworking the code for you.
ds = [{'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1},
{'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1},
{'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1},
{'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2},
{'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2},
{'id': 6, 'name': 'category6', 'parent_category_id': 4, 'level': 3},
{'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}]
e = {1: {4: {6: None}, 5: {7: None}}, 2: None, 3: None}
parents = set()
children = {}
for d in ds:
c = str(d['id'])
p = str(d['parent_category_id'])
if p is not None:
parents.add(p)
children[c] = p
# recursively determine parents until child has no parent
def ancestors(p):
return (ancestors(children[p]) if p in children else []) + [p]
# for each child that has no children print the geneology
for k in (set(children.keys()) - parents):
print ' '.join(ancestors(k)[1:])
outputs:
3
2
1 5 7
1 4 6
To turn this into a nested dictionary I refer you to What is the best way to implement nested dictionaries?
def flat_to_hierarchical(d, category_id=None):
out = list()
for item in filter(lambda item: item['parent_category_id']==category_id, d):
out.append(dict(
name = item['name'],
subcategories = flat_to_hierarchical(d, item['id'])
))
return out
print(flat_to_hierarchical(d))
We start with a way to make_tree given an index and a root node identity
def make_tree (index, root):
if not root in index:
return []
else:
return [ make_node (index, child) for child in index[root] ]
Now we need a way to make_node - this is where we convert to an element in your input data to an element of our output tree
def make_node (index, child):
return \
{ 'name': child['name']
, 'children': make_tree (index, child['id'])
}
Now of course we need a way to make_index based on your input data. We use itertools groupby so that we can perform efficient lookup of all child nodes
from itertools import groupby
def make_index (nodes):
return \
{ k: list (v)
for (k,v) in
groupby (nodes, lambda n: n['parent_category_id']) }
Lastly we write main to tie it all together. Note the data is not re-indexed or filtered for each iteration
def main (nodes, root = None):
return make_tree (make_index (nodes), root)
Full program demonstration
from itertools import groupby
def make_tree (index, root):
if not root in index:
return []
else:
return [ make_node (index, child) for child in index[root] ]
def make_node (index, child):
return \
{ 'name': child['name']
, 'children': make_tree (index, child['id'])
}
def make_index (nodes):
return \
{ k: list (v)
for (k,v) in
groupby (nodes, lambda n: n['parent_category_id']) }
def main (nodes, root = None):
return make_tree (make_index (nodes), root)
d = \
[ {'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1}
, {'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1}
, {'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1}
, {'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2}
, {'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2}
, {'id': 7, 'name': 'category6', 'parent_category_id': 4, 'level': 3}
, {'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}
]
# get sub-tree of [None] from dataset [d]
print (main (d, None))
Program output
[ { 'name': 'category1'
, 'children': [ { 'name': 'category4'
, 'children': [ { 'name': 'category6'
, 'children': []
}
]
}
, { 'name': 'category5'
, 'children': [ { 'name': 'category7'
, 'children': []
}
]
}
]
}
, { 'name': 'category2', 'children': [] }
, { 'name': 'category3', 'children': [] }
]

Merge two list of dictionaries based on specific key

Ok, so I have this code.
data1 = [
{'Id': 1},
{'Id': 2}
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444}
]
expectedData = [
{'Id': 1, 'Score': 100},
{'Id': 2, 'Score': 200}
]
def merge_lists(data1, data2, key):
merged = {}
for item in data1+data2:
if item[key] in merged:
merged[item[key]].update(item)
else:
merged[item[key]] = item
return [val for (_, val) in merged.items()]
merged = merge_lists(data1, data2, 'Id')
print merged
The problem is that this will merge every value(that's not 'id') in data2 into data1. I only want it to merge the key 'score', but I'm really not sure how to specify that key only. I've tried multiple other conditional statements in order to specify the 'score' key. But I can't seem to get anything working.
Thanks for any help
You could factory function like this
data1 = [
{'Id': 1},
{'Id': 2}
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444}
]
def get_score(list_of_dict, id_value):
for dict_ in list_of_dict:
if dict_["Id"] == id_value:
return {"score": dict_["score"]}
res = data1.copy()
for dict_ in res:
dict_.update(get_score(data2, dict_["Id"]))
print(res)
# [{'score': 100, 'Id': 1}, {'score': 200, 'Id': 2}]
Here's code that will merge the way you want. Note that if there are multiple lists in data2 with the correct matching key only the first one will be found.
data1 = [
{'Id': 1},
{'Id': 2},
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444},
]
def merge_lists(data1, data2, key):
result = []
for d1 in data1:
val = d1[key]
dnew = {key: val}
for d2 in data2:
if d2[key] == val:
dnew['score'] = d2['score']
break
else:
raise KeyError('No match for %r: %s' % (key, val))
result.append(dnew)
return result
merged = merge_lists(data1, data2, 'Id')
print merged
output
[{'score': 100, 'Id': 1}, {'score': 200, 'Id': 2}]
If we change data1 to
data1 = [
{'Id': 1},
{'Id': 5},
{'Id': 2},
]
Then we get this output:
Traceback (most recent call last):
File "./qtest.py", line 45, in <module>
merged = merge_lists(data1, data2, 'Id')
File "./qtest.py", line 41, in merge_lists
raise KeyError('No match for %r: %s' % (key, val))
KeyError: "No match for 'Id': 5"

Count the number of items from a dictionary inside another dictionary in Python

What better way (most elegant) to count the number of items {'id': n}?
'childs': {
1: [{'id': 1}, {'id': 2}, {'id': 3}],
2: [{'id': 4}],
3: [{'id': 5}, {'id': 6},]
}
>>> 6
You should answer to the comments to have a proper answer.
In the meantime, with d being the dict, I'd go with:
sum(len(x) for x in d['childs'].itervalues())
If you don't want to use for, you can do:
sum(map(len, d['childs'].itervalues()))
Or a twisted:
reduce(lambda x, y: x + len(y), d['childs'].itervalues(), 0)
But really the first version is how you would do it. It's classic Python.
As far as I understood your problem, the main task is to count the total occurrence of key "id" whenever it has value of "n". Here the dictionary name is "Childs".
count = 0
for key,value in Childs.iteritems():
if Childs[key]["id"]:
count += 1
print count
Hope it helps!!
cat = {1: [{'id': 1}, {'id': 2}, {'id': 3}], 2: [{'id': 4}], 3: [{'id': 5}, {'id': 6},]}
sum(len(x) for x in cat.values())
myDict = {'childs': {
1: [{'id': 1}, {'id': 2}, {'id': 3}],
2: [{'id': 4}],
3: [{'id': 5}, {'id': 6},]
}}
count = 0
for key in myDict["childs"]:
for item in myDict["childs"][key]:
if checkIfItemIsValid(item):
count += 1
print count
d = {
1: [{'id': 1}, {'id': 2}, {'id': 3}],
2: [{'id': 4}],
3: [{'id': 5}, {'id': 6},]
}
sum(len(i) for i in d.itervalues())
# 6
But this assumes that every member of that child value list is actually an item you want to count.
Here is one other way without using for:
sum(map(len, d.itervalues()))
In case some items may not have the 'id' key and you are only trying to count those which have that key:
sum(1 for obj in i for i in d.itervalues() if 'id' in obj)
or
len([obj for obj in i for i in d.itervalues() if 'id' in obj])

Categories

Resources