I am representing category hierarchy in flat manner.
Category Hierarchy is
category1
category4
category6
category5
category7
category2
category3
I am storing this as list using dictionary
d = [{'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1},
{'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1},
{'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1},
{'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2},
{'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2},
{'id': 7, 'name': 'category6', 'parent_category_id': 4, 'level': 3},
{'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}]
What can be best approach to convert this category list to hierarchical list like
[{'name': 'category1',
'subcategory': [{'name': 'category4',
'subcategory': [{'name': 'category6', 'subcategory': []}]},
{'name': 'category5',
'subcategory': [{'name': 'category7', 'subcategory': []}]}]},
{'name': 'category2', 'subcategory': []},
{'name': 'category3', 'subcategory': []}]
Your problem is very similar to that I answered at: Calculating the Path from Parent Child Relationships
I note that you seem to have a lot of superfluous fields in your data structures. Essentially you could represent the information in the post by:
d = {1: {4: {6: None}, 5: {7: None}}, 2: None, 3: None}
Reworking the code for you.
ds = [{'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1},
{'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1},
{'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1},
{'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2},
{'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2},
{'id': 6, 'name': 'category6', 'parent_category_id': 4, 'level': 3},
{'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}]
e = {1: {4: {6: None}, 5: {7: None}}, 2: None, 3: None}
parents = set()
children = {}
for d in ds:
c = str(d['id'])
p = str(d['parent_category_id'])
if p is not None:
parents.add(p)
children[c] = p
# recursively determine parents until child has no parent
def ancestors(p):
return (ancestors(children[p]) if p in children else []) + [p]
# for each child that has no children print the geneology
for k in (set(children.keys()) - parents):
print ' '.join(ancestors(k)[1:])
outputs:
3
2
1 5 7
1 4 6
To turn this into a nested dictionary I refer you to What is the best way to implement nested dictionaries?
def flat_to_hierarchical(d, category_id=None):
out = list()
for item in filter(lambda item: item['parent_category_id']==category_id, d):
out.append(dict(
name = item['name'],
subcategories = flat_to_hierarchical(d, item['id'])
))
return out
print(flat_to_hierarchical(d))
We start with a way to make_tree given an index and a root node identity
def make_tree (index, root):
if not root in index:
return []
else:
return [ make_node (index, child) for child in index[root] ]
Now we need a way to make_node - this is where we convert to an element in your input data to an element of our output tree
def make_node (index, child):
return \
{ 'name': child['name']
, 'children': make_tree (index, child['id'])
}
Now of course we need a way to make_index based on your input data. We use itertools groupby so that we can perform efficient lookup of all child nodes
from itertools import groupby
def make_index (nodes):
return \
{ k: list (v)
for (k,v) in
groupby (nodes, lambda n: n['parent_category_id']) }
Lastly we write main to tie it all together. Note the data is not re-indexed or filtered for each iteration
def main (nodes, root = None):
return make_tree (make_index (nodes), root)
Full program demonstration
from itertools import groupby
def make_tree (index, root):
if not root in index:
return []
else:
return [ make_node (index, child) for child in index[root] ]
def make_node (index, child):
return \
{ 'name': child['name']
, 'children': make_tree (index, child['id'])
}
def make_index (nodes):
return \
{ k: list (v)
for (k,v) in
groupby (nodes, lambda n: n['parent_category_id']) }
def main (nodes, root = None):
return make_tree (make_index (nodes), root)
d = \
[ {'id': 1, 'name': 'category1', 'parent_category_id': None, 'level': 1}
, {'id': 2, 'name': 'category2', 'parent_category_id': None, 'level': 1}
, {'id': 3, 'name': 'category3', 'parent_category_id': None, 'level': 1}
, {'id': 4, 'name': 'category4', 'parent_category_id': 1, 'level': 2}
, {'id': 5, 'name': 'category5', 'parent_category_id': 1, 'level': 2}
, {'id': 7, 'name': 'category6', 'parent_category_id': 4, 'level': 3}
, {'id': 7, 'name': 'category7', 'parent_category_id': 5, 'level': 3}
]
# get sub-tree of [None] from dataset [d]
print (main (d, None))
Program output
[ { 'name': 'category1'
, 'children': [ { 'name': 'category4'
, 'children': [ { 'name': 'category6'
, 'children': []
}
]
}
, { 'name': 'category5'
, 'children': [ { 'name': 'category7'
, 'children': []
}
]
}
]
}
, { 'name': 'category2', 'children': [] }
, { 'name': 'category3', 'children': [] }
]
Related
I have the dataframe as follows.
dictdata={'id':[991,992,989,5,4],'parentid':[4,4,4,0,0],'pcid': ['4_2','4_3','4_1','5','4']}
df = pd.DataFrame(dictdata)
id parentid pcid
0 991 4 4_2
1 992 4 4_3
2 989 4 4_1
3 5 0 5
4 4 0 4
column id is parent column and parentid represents the relationship of within the id(rows)
I want to generate the dictionary from above dataframe as follows:
{'data': [{'id': '5', 'level': 0, 'parentid': 0, 'pcid': '5', 'children': []},
{'id': '4',
'level': 0,
'parentid': 0,
'pcid': '4',
'children': [{'id': '991',
'level': 2,
'parentid': 4,
'pcid': '4_2'
},
{'id': '992',
'level': 3,
'parentid': 4,
'pcid': '4_3'
},
{'id': '989',
'level': 1,
'parentid': 4,
'pcid': '4_1'
}
]}
]}
from collections import defaultdict
d = defaultdict(dict)
for _, (i, parentid, pcid) in df.iterrows():
d[i]['parentid'] = parentid
d[i]['pcid'] = pcid
level = 0
if '_' in pcid:
_, level = pcid.split('_')
d[i]['level'] = level
result = {}
for k, v in d.items():
if v['parentid'] not in d:
result[k] = {'id': k, **v, 'children':[]}
for k, v in d.items():
if v['parentid'] in d:
result[v['parentid']]['children'].append({'id': k, **v})
result
output:
{5: {'id': 5, 'parentid': 0, 'pcid': '5', 'level': 0, 'children': []},
4: {'id': 4,
'parentid': 0,
'pcid': '4',
'level': 0,
'children': [{'id': 991, 'parentid': 4, 'pcid': '4_2', 'level': '2'},
{'id': 992, 'parentid': 4, 'pcid': '4_3', 'level': '3'},
{'id': 989, 'parentid': 4, 'pcid': '4_1', 'level': '1'}]}}
try this might help you
cols = df.columns
d = dict((df.groupby('id')[cols].apply(lambda x: x.to_dict('r'))))
>>d
{4: [{'id': 4, 'parentid': 0, 'pcid': 4}],
5: [{'id': 5, 'parentid': 0, 'pcid': 5}],
989: [{'id': 989, 'parentid': 4, 'pcid': '4_1'}],
991: [{'id': 991, 'parentid': 4, 'pcid': '4_2'}],
992: [{'id': 992, 'parentid': 4, 'pcid': '4_3'}]}
Create parent and child dataframe from actual dataframe
dfchild = df.loc[~(df['id']==df['pcid'])]
dfparent = df.loc[(df['id']==df['pcid'])]
convert these df into dict:
parentdict = dfparent.to_dict('records')
childdict = dfchild.to_dict('records')
iterate the parentdict and inside get the respective child element from childdict based on parentid:
for parentdic in parentdict:
eleid= int(parentdic['id'])
childdictva=[]
for child in childdict:
childele=int(float(child['parentid']))
if childele==eleid:
childdictva.append(child)
parentdic['children']=childdictva
Final code:
df['id']=df['id'].apply(str)
dfchild = df.loc[~(df['id']==df['pcid'])]
dfparent = df.loc[(df['id']==df['pcid'])]
data = {}
parentdict = dfparent.to_dict('records')
childdict = dfchild.to_dict('records')
for parentdic in parentdict:
eleid= int(parentdic['id'])
childdictva=[]
for child in childdict:
childele=int(float(child['parentid']))
if childele==eleid:
childdictva.append(child)
parentdic['children']=childdictva
data["data"] = parentdict
parentdict
{'data': [{'id': '5', 'level': 0, 'parentid': 0, 'pcid': '5', 'children': []},
{'id': '4',
'level': 0,
'parentid': 0,
'pcid': '4',
'children': [{'id': '991', 'level': 2, 'parentid': 4, 'pcid': '4_2'},
{'id': '992', 'level': 3, 'parentid': 4, 'pcid': '4_3'},
{'id': '989', 'level': 1, 'parentid': 4, 'pcid': '4_1'}]}]}
This question already has answers here:
Getting the difference (delta) between two lists of dictionaries
(4 answers)
Closed 3 years ago.
I have two dictionaries like below:
prev = [
{ 'id': 0, 'name': 'a' },
{ 'id': 1, 'name': 'b' },
{ 'id': 2, 'name': 'c' }
]
current = [
{ 'id': 1, 'name': 'b' },
{ 'id': 2, 'name': 'c' },
{ 'id': 3, 'name': 'e' },
{ 'id': 4, 'name': 'f' }
]
I want to get the difference of them, the result should be like below:
result = [
{ 'id': 3, 'name': 'e' },
{ 'id': 4, 'name': 'f' }
]
Only the difference of those two should appear in result list, mu solution is like below
common = []
for c in current:
for p in prev:
if c['name'] == p['name']:
common.append(c)
print(common)
I'm trying to find the common items between two and then subtract it from current list but I don't know hot to handle it. If I am using the wrong procedure to resolve the issue, is there another way I can find these two diffs?
I tried to search a lot but all the results I found were just comparing two list of integers, which in my case is list of dictionaries.
Also note that the id key is just for separating those items, let's compare by name, let's consider I want to remove commons from current and remain the rest just in current list. By that I mean that I don't need name: a and name: b from prev list.
Simple
From the data you posted, you can compare the whole dicts, so just find dicts in current that are not in prev:
new = [d for d in current if d not in prev]
print(new) # -> [{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
Complex
If your real-world data might have differing ids, the solution needs to get more complex.
Since only the names are important, make a set of common names. Then you can loop over the dicts and check whether the name is in the common set.
prev = [{'id': 0, 'name': 'a'}, {'id': 1, 'name': 'b'}, {'id': 2, 'name': 'c'}]
current = [{'id': 1, 'name': 'b'}, {'id': 2, 'name': 'c'}, {'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
prev_names, current_names = [{d['name'] for d in x} for x in (prev, current)] # [{'c', 'b', 'a'}, {'c', 'b', 'f', 'e'}]
common_names = prev_names & current_names # {'b', 'c'}
new = [d for d in current if d['name'] not in common_names]
print(new) # -> [{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
This is also easy to adapt to getting names in prev that are not common:
old = [d for d in prev if d['name'] not in common_names]
print(old) # -> [{'id': 0, 'name': 'a'}]
This will do the job
prev = [ { 'id': 0, 'name': 'a' }, { 'id': 1, 'name': 'b' }, { 'id': 2, 'name': 'c' } ]
current = [ { 'id': 1, 'name': 'b' }, { 'id': 2, 'name': 'c' }, { 'id': 3, 'name': 'e' }, { 'id': 4, 'name': 'f' } ]
common = []
for c in current:
if not any(c['id'] == p['id'] and c['name'] == p['name'] for p in prev):
common.append(c)
print(common)
Return True if any element of the iterable is true. If the iterable is empty, return False
Alse, as #wjandrea noted in the comments, this
new = [c for c in current if c not in prev]
is also a fair and nice answer. But note that it only works if comparing the whole dicts
If I understood correctly, you want only the items that appear in current and did not appear in prev.
Something like this should work
prev_names = set(map(lambda x: x['name'], prev))
new_items = [item for item in current if item['name'] not in prev_names]
new_items # [{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
Code
import itertools
list(itertools.filterfalse(lambda x: x in prev, current))
Output:
[{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
Based on all of the answers here is a little benchmark
import timeit
import itertools
prev = [{'id': 0, 'name': 'a'}, {'id': 1, 'name': 'b'}, {'id': 2, 'name': 'c'}]
current = [{'id': 1, 'name': 'b'}, {'id': 2, 'name': 'c'}, {'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
def f1():
prev_names = set(map(lambda x: x['name'], prev))
new_items = [item for item in current if item['name'] not in prev_names]
return new_items
def f2():
common = []
for c in current:
if not any(c['id'] == p['id'] and c['name'] == p['name'] for p in prev):
common.append(c)
return common
def f3():
return list(itertools.filterfalse(lambda x: x in prev, current))
print(f1())
print(timeit.timeit("f1()", setup="from __main__ import f1"))
print(f2())
print(timeit.timeit("f2()", setup="from __main__ import f2"))
print(f3())
print(timeit.timeit("f3()", setup="from __main__ import f3"))
[{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
0.8235890520736575
[{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
2.0767332719406113
[{'id': 3, 'name': 'e'}, {'id': 4, 'name': 'f'}]
0.864271447993815
I have seen Python: remove dictionary from list and Splitting a list of dictionaries into several lists of dictionaries - but this question is slightly different.
Consider this working example (same in Python 2 or 3):
#!/usr/bin/env python
from __future__ import print_function
origarr = [
{ 'name': 'test01', 'type': 0, 'value': 42 },
{ 'name': 'test02', 'type': 0, 'value': 142 },
{ 'name': 'test03', 'type': 2, 'value': 242 },
{ 'name': 'test04', 'type': 2, 'value': 342 },
{ 'name': 'test05', 'type': 3, 'value': 42 },
]
print("origarr: {}".format(origarr))
lastdictelem = origarr.pop()
print("\nlastdictelem: {}".format(lastdictelem))
print("after pop, origarr: {}".format(origarr))
namestofilter = [ 'test01', 'test02' ]
newarr = []
for iname in namestofilter:
# find the object having the name iname
foundidx = -1
for ix, idict in enumerate(origarr):
if idict.get('name') == iname:
foundidx = ix
break
if foundidx > -1:
# remove dict object via pop at index, save removed object
remdict = origarr.pop(foundidx)
# add removed object to newarr:
newarr.append(remdict)
print("\nafter namestofilter:")
print("newarr: {}".format(newarr))
print("origarr: {}".format(origarr))
Basically, mylist.pop() removes the last element from mylist as an object (here a dict), and returns it - then I can trivially insert it in a new array/list; this is illustrated by the first printout of this script:
$ python2 test.py
origarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}, {'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}, {'name': 'test05', 'type': 3, 'value': 42}]
lastdictelem: {'name': 'test05', 'type': 3, 'value': 42}
after pop, origarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}, {'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}]
Now, what I would like to do, is define an array with values for the name key in a dict (say, namestofilter = [ 'test01', 'test02' ]), and have those dicts removed from the orriginal array/list, and put into a new array/list (as .pop() would do with a single element and an object reference).
Since pop removes the item at a specific index and returns it, the above code does exactly that - and works:
...
after namestofilter:
newarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}]
origarr: [{'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}]
... but I was wondering - is there a more compact way of doing that, other than "manually" for-looping through the two arrays, and calling .pop()/.append() individually (as done in the example)?
I'm not sure is there a way to it compact - probaly not.
But you can simpify code a little bit and also don't spend O(n) for each .pop:
origarr = [
{ 'name': 'test01', 'type': 0, 'value': 42 },
{ 'name': 'test02', 'type': 0, 'value': 142 },
{ 'name': 'test03', 'type': 2, 'value': 242 },
{ 'name': 'test04', 'type': 2, 'value': 342 },
{ 'name': 'test05', 'type': 3, 'value': 42 },
]
namestofilter = set([ 'test01', 'test02' ]). # could be a list as in question
print("origarr: {}".format(origarr))
lastdictelem = origarr.pop()
print("\nlastdictelem: {}".format(lastdictelem))
print("after pop, origarr: {}".format(origarr))
shift = 0
newarr = []
for ix, idict in enumerate(origarr):
if idict['name'] in namestofilter:
shift += 1
newarr.append(idict)
continue
origarr[ix-shift] = origarr[ix]
origarr = origarr[:-shift] # perhaps it is a slicing O(n) copy overhead
print("\nafter namestofilter:")
print("newarr: {}".format(newarr))
print("origarr: {}".format(origarr))
Output:
origarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}, {'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}, {'name': 'test05', 'type': 3, 'value': 42}]
lastdictelem: {'name': 'test05', 'type': 3, 'value': 42}
after pop, origarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}, {'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}]
after namestofilter:
newarr: [{'name': 'test01', 'type': 0, 'value': 42}, {'name': 'test02', 'type': 0, 'value': 142}]
origarr: [{'name': 'test03', 'type': 2, 'value': 242}, {'name': 'test04', 'type': 2, 'value': 342}]
I'm looking for pythonic way to convert list of tuples which looks like this:
res = [{type: 1, name: 'Nick'}, {type: 2, name: 'Helma'}, ...]
To dict like this:
{1: [{type: 1, name: 'Nick'}, ...], 2: [{type: 2, name: 'Helma'}, ...]}
Now i do this with code like this (based on this question):
d = defaultdict(list)
for v in res:
d[v["type"]].append(v)
Is this a Pythonic way to build dict of lists of objects by attribute?
I agree with the commentators that here, list comprehension will lack, well, comprehension.
Having said that, here's how it can go:
import itertools
a = [{'type': 1, 'name': 'Nick'}, {'type': 2, 'name': 'Helma'}, {'type': 1, 'name': 'Moshe'}]
by_type = lambda a: a['type']
>>> dict([(k, list(g)) for (k, g) in itertools.groupby(sorted(a, key=by_type), key=by_type)])
{1: [{'name': 'Nick', 'type': 1}, {'name': 'Moshe', 'type': 1}], ...}
The code first sorts by 'type', then uses itertools.groupby to group by the exact same critera.
I stopped understanding this code 15 seconds after I finished writing it :-)
You could do it with a dictionary comprehension, which wouldn't be as illegible or incomprehensible as the comments suggest (IMHO):
# A collection of name and type dictionaries
res = [{'type': 1, 'name': 'Nick'},
{'type': 2, 'name': 'Helma'},
{'type': 3, 'name': 'Steve'},
{'type': 1, 'name': 'Billy'},
{'type': 3, 'name': 'George'},
{'type': 4, 'name': 'Sylvie'},
{'type': 2, 'name': 'Wilfred'},
{'type': 1, 'name': 'Jim'}]
# Creating a dictionary by type
res_new = {
item['type']: [each for each in res
if each['type'] == item['type']]
for item in res
}
>>>res_new
{1: [{'name': 'Nick', 'type': 1},
{'name': 'Billy', 'type': 1},
{'name': 'Jim', 'type': 1}],
2: [{'name': 'Helma', 'type': 2},
{'name': 'Wilfred', 'type': 2}],
3: [{'name': 'Steve', 'type': 3},
{'name': 'George', 'type': 3}],
4: [{'name': 'Sylvie', 'type': 4}]}
Unless I missed something, this should give you the result you're looking for.
I need to do a function that generate a rank number(integer) based in N parameter received in that function. Each number need to be the same order of magnitude of that parameters, compare numbers of the same field and when the numbers of the same field tie than use second field to deadlock, e.g:
func({'field1': 100, 'field2': 3500225})
func({'field1': 50, 'field2': 5465481362135)
The number generated by the first function must be higher than the second because 100 is greater than 50.
func({'field1': 100, 'field2': 3500225})
func({'field1': 100, 'field2': 5465481362135)
The number generated by the second function must be higher than the first because field1 is tie than to deadloack it need to use the second field, so 5465481362135 is greater than 3500225.
func({'field1': 100, 'field2': 3500225})
func({'field1': 100, 'field2': 3500225, 'field3': 5465481362135, ...N})
In the sample above the second function need to be higher than the first because the first call function doesn't have the field3 so we can set their value to zero. Note that we can have N fields.
I have tried this code bellow but the value after the dot isn't right because a simple sum doesn't consider the priority of fields.:
Config = [
{'id': 1, 'parent_id': None, 'Description': 'root', 'level': 1},
{'id': 2, 'parent_id': 1, 'Description': 'PF', 'level': 1},
{'id': 3, 'parent_id': 1, 'Description': 'FP', 'level': 2},
{'id': 4, 'parent_id': 2, 'Description': 'Bank', 'level': 1},
{'id': 5, 'parent_id': 2, 'Description': 'Input', 'level': 2},
{'id': 6, 'parent_id': 4, 'Description': 'ST', 'level': 1},
{'id': 7, 'parent_id': 4, 'Description': 'CF', 'level': 2},
{'id': 8, 'parent_id': 4, 'Description': 'BB', 'level': 3},
{'id': 9, 'parent_id': 5, 'Description': 'DDS', 'level': 1},
{'id': 10, 'parent_id': 5, 'Description': 'Col', 'level': 2},
{'id': 11, 'parent_id': 3, 'Description': 'Qtd.Event', 'level': 1},
{'id': 12, 'parent_id': 3, 'Description': 'Unix_Date', 'level': 2},
]
def hierarchy_field(field):
if field[0]['parent_id'] is None:
return field
else:
[field.append(x) for x in hierarchy_field([dic for dic in Config if dic['id'] == field[0]['parent_id']])]
return field
def calc_priority(fields):
for field in [dic for dic in Config if dic['Description'] in [x for x in fields]]:
score_level = sum(x['level'] for x in hierarchy_field([field]))*-1
score_level += score_level
score_value = sum(fields.values())
score = str(score_level)+'.'+str(score_value)
return score
# dic[field['Description']] = score_level
# return sorted(dic, reverse=True)
value = calc_priority({'Qtd.Event': 871, 'Unix_Date': 564645})
print(value)