How to remove 'u' efficiently in django querySet? - python

I'm making simple django app.
I made database and I want to call it from javascript code.
Everything was fine, but I had some problem with 'u' string..
models.py
class FileDB(models.Model):
fileName = models.CharField(max_length=200)
fileState = models.IntegerField(default=0)
views.py
from .models import FileDB
def GetFileList(request, state):
list = FileDB.objects.filter(fileState=state).values()
return HttpsResponse(list)
urls.py
urlpatterns = [
...
url(r^getfilelist/(?P<state>[0-2]+)/$', view.GetFileList),
]
In Chrome browser : x.x.x.x:8000/getfilelist/0/
{'fileState': 0, u'id': 1, 'fileName': u'image.jpg'}{'fileState': 0, u'id': 2, 'fileName': u'image2.jpg'}{'fileState': 0, u'id': 3, 'fileName': u'picture1.jpg'}{'fileState': 0, u'id': 4, 'fileName': u'video1.avi'}
How can I remove annoying 'u' string efficiently?

You can to use json library.
import json
from .models import FileDB
def get_file_list(request, state):
list = list(FileDB.objects.filter(fileState=state).values())
return HttpsResponse(json.dumps(list))
This should give you something like this
[{"fileState": 0, "id": 1, "fileName": "image.jpg"}, {"fileState": 0, "id": 2, "fileName": "image2.jpg"}, {"fileState": 0, "id": 3, "fileName": "picture1.jpg"}, {"fileState": 0, "id": 4, "fileName": "video1.avi"}]

You can remove the u characters from the keys and values of a dictionary by using a dictionary comprehension and casting the
>>> x = {'fileState': 0, u'id': 1, 'fileName': u'image.jpg'}
>>> {str(k): str(v) for k, v in x.iteritems()}
{'fileState': '0', 'id': '1', 'fileName': 'image.jpg'}
If you have a list of dictionaries, you can simply do the same but for each element in the list.
>>> y = [{'fileState': 0, u'id': 1, 'fileName': u'image.jpg'}, {'fileState': 0, u'id': 2, 'fileName': u'image2.jpg'}, {'fileState': 0, u'id': 3, 'fileName': u'picture1.jpg'}, {'fileState': 0, u'id': 4, 'fileName': u'video1.avi'}]
>>> newList = []
>>> for dct in y:
... newList.append({str(k): str(v) for k, v in dct.iteritems()})
...
>>> newList
[{'fileState': '0', 'id': '1', 'fileName': 'image.jpg'}, {'fileState': '0', 'id': '2', 'fileName': 'image2.jpg'}, {'fileState': '0', 'id': '3', 'fileName': 'picture1.jpg'}, {'fileState': '0', 'id': '4', 'fileName': 'video1.avi'}]

Related

Type Error for Path Data. Must be list or null

I am getting the following error when attempting to convert json data to a dataframe. I have successfully used this same method to convert json to a dataframe with similar data in the same script.
The full error:
TypeError: {'success': True, 'data': {'data1': 1, 'data2': 1, 'data3': 1, 'data4': True, 'data5': 0, 'data6': 0, 'data7': False, 'data8': 'ABC', 'start_date': '2000-04-14', 'end_date': '2000-09-23', 'data9': None, 'add_time': '2000-07-12 23:00:11', 'update_time': '2000-06-1420:18:55', 'data10': 1, 'data11': 'custom', 'data12': None}}
has non list value
{'data1': 1, 'data2': 1, 'data3': 1, 'data4': True, 'data5': 0, 'data6': 0, 'data7': False, 'data8': 'ABC', 'start_date': '2000-04-14', 'end_date': '2000-09-23', 'data9': None, 'add_time': '2000-07-12 23:00:11', 'update_time': '2000-06-1420:18:55', 'data10': 1, 'data11': 'custom', 'data12': None}
for path data. Must be list or null.
the function:
def get_subscriptions(id, df):
subscriptions_params = {'api_token': 'abc'}
subscriptions_headers = {'Content-Type': 'application/json'}
subscriptions_response = requests.get('https://url/{}'.format(id), params=subscriptions_params,
headers=subscriptions_headers)
subscriptions_data = subscriptions_response.json()
subscriptions_temp_df = pd.json_normalize(subscriptions_data, record_path=['data'])
I do the exact same thing with a similar (but actually more complex) piece of data with no problems. An example of the response that works:
{'success': True, 'data': [{'data1': 1, 'data2': {'data3': 1, 'name': 'name', 'email': 'email#email.com', 'data4': 0, 'data5': None, 'data6': False, 'data7': 1}, 'data8': {'data9': 1, 'name': 'name', 'email': 'email#email.com', 'data10': 0, 'data11': None, 'data12': True, 'data13': 1}, 'data14': {'data15': True, 'name': 'name' .... etc.
this one is actually massive, where as for the one with issues the error includes the full length of the data.
removed the actual data, but did not change the type of data. strings inside single quotes are just other strings. 1s are just other numbers, etc.
any ideas why one succeeds and another fails?
I do not know what the issue/difference is, but this small change works:
def get_subscriptions(id, df):
subscriptions_params = {'api_token': 'abc'}
subscriptions_headers = {'Content-Type': 'application/json'}
subscriptions_response = requests.get('https://url/{}'.format(id), params=subscriptions_params, headers=subscriptions_headers)
subscriptions_data = subscriptions_response.json()
subscriptions_data = subscriptions_data['data']
subscriptions_temp_df = pd.json_normalize(subscriptions_data)

Successfully insert multiple document into MongoDB [Python]

I have the following piece of code in python:
def pushHashtagPosts(hashtagPosts):
from bson.json_util import loads
myclient = pymongo.MongoClient(mongoUri)
mydb = myclient["myDB"]
mycol = mydb["hashtags"]
data = loads(hashtagPosts)
posts = mycol.insert_many(data)
Whereas, the content of hashtagPosts looks something like this:
hashtagPosts = [{'hashtag': '###!', 'PostHashHex': '13fc9904028fb62490a3b5dc2111689376e52a06dc636c3322cfa16e33a41398', 'post': {'_id': {'$oid': '608f8eb73718c7977f9c0a43'}, 'PostHashHex': '13fc9904028fb62490a3b5dc2111689376e52a06dc636c3322cfa16e33a41398', 'PosterPublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'ParentStakeID': '', 'Body': 'Need hashtags ####! Or else it’s a bit difficult to create personal brand and niche on this platform. \n\nDevs are u listening?', 'ImageURLs': [], 'RecloutedPostEntryResponse': None, 'CreatorBasisPoints': 0, 'StakeMultipleBasisPoints': 12500, 'TimestampNanos': 1.6177643730879583e+18, 'IsHidden': False, 'ConfirmationBlockHeight': 13248, 'InMempool': False, 'StakeEntry': {'TotalPostStake': 0, 'StakeList': []}, 'StakeEntryStats': {'TotalStakeNanos': 0, 'TotalStakeOwedNanos': 0, 'TotalCreatorEarningsNanos': 0, 'TotalFeesBurnedNanos': 0, 'TotalPostStakeNanos': 0}, 'ProfileEntryResponse': None, 'Comments': None, 'LikeCount': 5, 'PostEntryReaderState': None, 'InGlobalFeed': False, 'IsPinned': False, 'PostExtraData': {}, 'CommentCount': 2, 'RecloutCount': 0, 'ParentPosts': None, 'PublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'Username': ''}},
{'hashtag': 'investementstrategy', 'PostHashHex': '92f2d08ac8f2b47fe5868b748c7f472e13ad12c284bb0e327cf317b4c2514f83', 'post': {'_id': {'$oid': '608f8eb73718c7977f9c0a3f'}, 'PostHashHex': '92f2d08ac8f2b47fe5868b748c7f472e13ad12c284bb0e327cf317b4c2514f83', 'PosterPublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'ParentStakeID': '', 'Body': 'Don’t say that you are going to buy ur own coin to have a steady growth of ur coin \U0001fa99. That doesn’t show the strength of ur investment nor the coin.📉📈 Strength lies in others believing in ur talent, creativity and passion enough to invest in U. 🚀🚀🚀\n#InvestementStrategy', 'ImageURLs': [], 'RecloutedPostEntryResponse': None, 'CreatorBasisPoints': 0, 'StakeMultipleBasisPoints': 12500, 'TimestampNanos': 1.6178065064906166e+18, 'IsHidden': False, 'ConfirmationBlockHeight': 13397, 'InMempool': False, 'StakeEntry': {'TotalPostStake': 0, 'StakeList': []}, 'StakeEntryStats': {'TotalStakeNanos': 0, 'TotalStakeOwedNanos': 0, 'TotalCreatorEarningsNanos': 0, 'TotalFeesBurnedNanos': 0, 'TotalPostStakeNanos': 0}, 'ProfileEntryResponse': None, 'Comments': None, 'LikeCount': 2, 'PostEntryReaderState': None, 'InGlobalFeed': False, 'IsPinned': False, 'PostExtraData': {}, 'CommentCount': 1, 'RecloutCount': 0, 'ParentPosts': None, 'PublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'Username': ''}},
{'hashtag': 'productivity', 'PostHashHex': 'c8fabd96f5d624d06ec8d23e90de19cf07ad4b6696dac321fda815c3000fbf1b', 'post': {'_id': {'$oid': '608f8eb73718c7977f9c0a3d'}, 'PostHashHex': 'c8fabd96f5d624d06ec8d23e90de19cf07ad4b6696dac321fda815c3000fbf1b', 'PosterPublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'ParentStakeID': '', 'Body': 'What is the most productive thing u have done in last 24 hours apart from Bitclout???\n\n\U0001f9d0😏🙌🏼 #productivity', 'ImageURLs': [], 'RecloutedPostEntryResponse': None, 'CreatorBasisPoints': 0, 'StakeMultipleBasisPoints': 12500, 'TimestampNanos': 1.6178362054980055e+18, 'IsHidden': False, 'ConfirmationBlockHeight': 13487, 'InMempool': False, 'StakeEntry': {'TotalPostStake': 0, 'StakeList': []}, 'StakeEntryStats': {'TotalStakeNanos': 0, 'TotalStakeOwedNanos': 0, 'TotalCreatorEarningsNanos': 0, 'TotalFeesBurnedNanos': 0, 'TotalPostStakeNanos': 0}, 'ProfileEntryResponse': None, 'Comments': None, 'LikeCount': 30, 'PostEntryReaderState': None, 'InGlobalFeed': True, 'IsPinned': False, 'PostExtraData': {}, 'CommentCount': 59, 'RecloutCount': 0, 'ParentPosts': None, 'PublicKeyBase58Check': 'BC1YLhKJZZcPB2WbZSSekFF19UshsmmPoEjtEqrYakzusLmL25xxAJv', 'Username': ''}}]
When I try to insert this data as insert_many() into mongodb I get the following error:
File "test.py", line X, in pushHashtagPosts
data = loads(hashtagPosts) TypeError: the JSON object must be str, bytes or bytearray, not 'list'
However, I have inserted the line 'data = loads(hashtagPosts)' based on the solution at bson.errors.InvalidDocument: key '$oid' must not start with '$' trying to insert document with pymongo because without the 'data = loads(hashtagPosts)' I was getting the following error:
bson.errors.InvalidDocument: key '$oid' must not start with '$'
How to resolve this and successfully insert many documents in the collection?
Your issue is that hashtagPosts is a list but loads expects to work on a string.
So working backwards, the question becomes how did you construct hashtagPosts in the first place? As it contains $oid values, it looks like an output from dumps; but an output from dumps is a string. not a list. So how did it become a list?
If you are creating it manually, then just set it using ObjectId, e.g.
from bson import ObjectId
item = {'_id': ObjectId('608f8eb73718c7977f9c0a43')}
and then you won't need to use loads.

what is the best way to add element to list of dict in python?

i got two list of dict like :
a_list = [
{'key': 1, 'md5': '65d28', 'file_path': '/test/test.gz'},
{'key': 2, 'md5': '800cc9', 'file_path': '/test/test2.gz'}
]
b_list = [
{'key': 1, 'md5': '65d28', 'is_upload': False},
{'key': 2, 'md5': '800cc9', 'is_upload': True}
]
I have to get results like :
a_list = [
{'key': 1, 'md5': '65d28', 'file_path': '/test/test.gz', 'is_upload': False},
{'key': 2, 'md5': '800cc9', 'file_path': '/test/test2.gz', 'is_upload': True}
]
what is most efficient way to do that??
my first code is :
for a in a_list:
for b in b_list:
if a['key'] == b['key'] and a['md5'] == b['md5']:
a['is_upload'] = b['is_upload']
break
But is there a more efficient with out using two loop? cause a_list and b_list may be a long list.
Thank you!
For larger list, you could do:
a_dict = {(ai['key'], ai['md5']): ai for ai in a_list}
b_dict = {(bi['key'], bi['md5']): bi for bi in b_list}
result = [{**value, **b_dict.get(key, {})} for key, value in a_dict.items()]
print(result)
Output
[{'file_path': '/test/test.gz', 'is_upload': False, 'key': 1, 'md5': '65d28'},
{'file_path': '/test/test2.gz', 'is_upload': True, 'key': 2, 'md5': '800cc9'}]
If you want to modify a_list in-place, do:
b_dict = {(bi['key'], bi['md5']): bi for bi in b_list}
for d in a_list:
d.update(b_dict.get((d['key'], d['md5']), {}))
print(a_list)
You can use this efficient code (with one-loop):
for i in range(len(a_list)):
if a_list[i]['key'] == b_list[i]['key'] and a_list[i]['md5'] == b_list[i]['md5']:
a_list[i]['is_upload'] = b_list[i]['is_upload']
Output:
a_list = [{'key': 1, 'md5': '65d28', 'file_path': '/test/test.gz', 'is_upload': False},
{'key': 2, 'md5': '800cc9', 'file_path': '/test/test2.gz', 'is_upload': True}]

Replacement for dataframe.iterrows()

I'am working on a script for migrating data from MongoDB to Clickhouse. Because of the reason that nested structures are'nt implemented good enough in Clickhouse, I iterate over nested structure and bring them to flat representation, where every element of nested structure is a distinct row in Clickhouse database.
What I do is iterate over list of dictionaries and take target values. The structure looks like this:
[
{
'Comment': None,
'Details': None,
'FunnelId': 'MegafonCompany',
'IsHot': False,
'IsReadonly': False,
'Name': 'Новый',
'SetAt': datetime.datetime(2018, 4, 20, 10, 39, 55, 475000),
'SetById': 'ekaterina.karpenko',
'SetByName': 'Екатерина Карпенко',
'Stage': {
'Label': 'Новые',
'Order': 0,
'_id': 'newStage'
},
'Tags': None,
'Type': 'Unknown',
'Weight': 120,
'_id': 'new'
},
{
'Comment': None,
'Details': {
'Name': 'взят в работу',
'_id': 1
},
'FunnelId': 'MegafonCompany',
'IsHot': False,
'IsReadonly': False,
'Name': 'В работе',
'SetAt': datetime.datetime(2018, 4, 20, 10, 40, 4, 841000),
'SetById': 'ekaterina.karpenko',
'SetByName': 'Екатерина Карпенко',
'Stage': {
'Label': 'Приглашение на интервью',
'Order': 1,
'_id': 'recruiterStage'
},
'Tags': None,
'Type': 'InProgress',
'Weight': 80,
'_id': 'phoneInterview'
}
]
I have a function that does this on dataframe object via data.iterrows() method:
def to_flat(data, coldict, field_last_upd):
m_status_history = stc.special_mongo_names['status_history_cand']
n_statuse_change = coldict['n_statuse_change']['name']
data[n_statuse_change] = n_status_change(dp.force_take_series(data, m_status_history))
flat_cols = [ x for x in coldict.values() if x['coltype'] == stc.COLTYPE_FLAT ]
old_cols_names = [ x['name'] for x in coldict.values() if x['coltype'] == stc.COLTYPE_PREPARATION ]
t_time = time.time()
t_len = 0
new_rows = list()
for j in range(row[n_statuse_change]):
t_new_value_row = np.empty(shape=[0, 0])
for k in range(len(flat_cols)):
if flat_cols[k]['colsubtype'] == stc.COLSUBTYPE_FLATPATH:
new_value = dp.under_value_line(
row,
path_for_status(j, row[n_statuse_change]-1, flat_cols[k]['path'])
)
# Дополнительно обрабатываем дату
if flat_cols[k]['name'] == coldict['status_set_at']['name']:
new_value = dp.iso_date_to_datetime(new_value)
if flat_cols[k]['name'] == coldict['status_set_at_mil']['name']:
new_value = dp.iso_date_to_miliseconds(new_value)
if flat_cols[k]['name'] == coldict['status_stage_order']['name']:
try:
new_value = int(new_value)
except:
new_value = new_value
else:
if flat_cols[k]['name'] == coldict['status_index']['name']:
new_value = j
t_new_value_row = np.append(t_new_value_row, dp.some_to_null(new_value))
new_rows.append(np.append(row[old_cols_names].values, t_new_value_row))
pdb.set_trace()
res = pd.DataFrame(new_rows, columns = [
x['name'] for x in coldict.values() if x['coltype'] == stc.COLTYPE_FLAT or x['coltype'] == stc.COLTYPE_PREPARATION
])
return res
It takes values from list of dicts, prepare them to correspond Clickhouse's requirements using numpy arrays and then appends them all together to get new dataframe with targeted values and its columnnames.
I've noticed that if nested structure is big enough, it begins to work much slower. I've found an article where different methods of iteration in Python are compared. article
It is claimed that it's much faster to iterate over .apply() method and even faster using vectorization. But the samples given are pretty trivial and rely on using the same function on all of the values. Is it possible to iterate over pandas object in faster manner, while using variety of functions on different types of data?
I think your first step should be converting your data into a pandas dataframe, then it will be so much easier to handle it. I couldn't deschiper the exact functions you wanted to run, but perhaps my example helps
import datetime
import pandas as pd
data_dict_array = [
{
'Comment': None,
'Details': None,
'FunnelId': 'MegafonCompany',
'IsHot': False,
'IsReadonly': False,
'Name': 'Новый',
'SetAt': datetime.datetime(2018, 4, 20, 10, 39, 55, 475000),
'SetById': 'ekaterina.karpenko',
'SetByName': 'Екатерина Карпенко',
'Stage': {
'Label': 'Новые',
'Order': 0,
'_id': 'newStage'
},
'Tags': None,
'Type': 'Unknown',
'Weight': 120,
'_id': 'new'
},
{
'Comment': None,
'Details': {
'Name': 'взят в работу',
'_id': 1
},
'FunnelId': 'MegafonCompany',
'IsHot': False,
'IsReadonly': False,
'Name': 'В работе',
'SetAt': datetime.datetime(2018, 4, 20, 10, 40, 4, 841000),
'SetById': 'ekaterina.karpenko',
'SetByName': 'Екатерина Карпенко',
'Stage': {
'Label': 'Приглашение на интервью',
'Order': 1,
'_id': 'recruiterStage'
},
'Tags': None,
'Type': 'InProgress',
'Weight': 80,
'_id': 'phoneInterview'
}
]
#converting your data into something pandas can read
# in particular, flattening the stage dict
for data_dict in data_dict_array:
d_temp = data_dict.pop("Stage")
data_dict["Stage_Label"] = d_temp["Label"]
data_dict["Stage_Order"] = d_temp["Order"]
data_dict["Stage_id"] = d_temp["_id"]
df = pd.DataFrame(data_dict_array)
# lets say i want to set comment to "cool" if name is 'В работе'
# in .loc[], the first argument is filtering the rows, the second argument is picking the column
df.loc[df['Name'] == 'В работе','Comment'] = "cool"
df

python - Convert list of dicts to hierarchy/multiple nested dicts - issue with orders

Currently I have these input:
query = [{'id': 1, 'desc': 'desc_father', 'parent_id': None}
,{'id': 2, 'desc': 'desc_child_1', 'parent_id': 10}
,{'id': 3, 'desc': 'desc_child_2', 'parent_id': 2}
,{'id': 4, 'desc': 'desc_child_5', 'parent_id': 5}
,{'id': 5, 'desc': 'desc_child_6', 'parent_id': 6}
,{'id': 6, 'desc': 'desc_child_1', 'parent_id': 1}]
This is my recursive function:
def recursive(parent_list, child_dict, parent_id):
for l in parent_list:
if parent_id in l.values():
if 'children' not in l:
l['children'] = []
l['children'].append(child_dict)
break
else:
for i in l:
if isinstance(l[i], list):
recursive(d[i], child_dict, parent_id)
return parent_list
This is my main code:
results = []
for q in query:
dict_item = {}
dict_item['id'] = q['id']
dict_item['desc'] = q['desc']
if q['parent_id'] is None:
results.append(dict_item)
else:
results= recursive(results, dict_item, q['parent_id'])
return results
So, with above given data and the code, I have the result as below:
[{
'desc' : 'desc_father',
'id' : 1,
'children' : [{
'desc' : 'desc_child_1',
'id' : 2,
'children' : [{
'desc' : 'desc_child_2',
'id' : 3
}
]
}, {
'desc' : 'desc_child_1',
'id' : 6
}
]
}
]
This result is as you could see missing the items with id = 4 and id = 5 because during the loops, the parents of these items haven't been created yet (the items with id = 5 & id = 6). I am having difficulties in fixing this problem as I don't know how to traverse back or forward the list to create the father item before the children ones. Help is appreciated. Thanks in advance.
UPDATED
I have added in one case for my query, which is the item with id = 2, this time the item is updated its parent_id to 10 (parent_id = 10), since we do not have the item with id = 10 as parent in our return result, so this id = 2 item will also be a root.
My new code based on Scott Hunter guidance but I still could not make it to work. I must have misunderstood somewhere:
new_dict = {}
for q in query:
q['Children'] = []
new_dict[q['id']] = q
for k, v in new_dict.iteritems():
print k, v
if v['parent_id'] is not None and v['parent_id'] in new_dict:
new_dict[k]['Children'].append(v)
print new_dict
UPDATED-2
Now I make it to work, based on Scott Hunter suggestion, please see my below code. However the code looks ugly with too many for, is there anyway that I could perfect this? Thanks a lot for your support, just one more step and it will be done!
new_dict = {}
for q in query:
q['children'] = []
q['parent'] = 1
new_dict[q['id']] = q
for k, v in new_dict.iteritems():
p_id = v['parent_id']
for kk, vv in new_dict.iteritems():
if kk == p_id:
v['parent'] = 0
vv['children'].append(v)
results = []
for d_id, d_item in new_dict.iteritems():
if d_item['parent'] == 1:
results.append(d_item)
print results
This would be my solution:
#! /usr/bin/env python3
from pprint import pprint
query = [{'id': 1, 'desc': 'desc_father', 'parent_id': None}
,{'id': 2, 'desc': 'desc_child_1', 'parent_id': 1}
,{'id': 3, 'desc': 'desc_child_2', 'parent_id': 2}
,{'id': 4, 'desc': 'desc_child_5', 'parent_id': 5}
,{'id': 5, 'desc': 'desc_child_6', 'parent_id': 6}
,{'id': 6, 'desc': 'desc_child_1', 'parent_id': 1}]
def rec(query, parent):
parent['children'] = []
for item in query:
if item['parent_id'] == parent['id']:
parent['children'].append(item)
rec(query, item)
root = {'id': None}
rec(query, root)
pprint(root, indent=4)
It gives me the output (The keys are out of order, but that's what you get when you use a dictionary)
maurice#ubuntu:~/Dev/random$ python recursion_tree.py
{ 'children': [ { 'children': [ { 'children': [],
'desc': 'desc_child_2',
'id': 3,
'parent_id': 2}],
'desc': 'desc_child_1',
'id': 2,
'parent_id': 1},
{ 'children': [ { 'children': [ { 'children': [ ],
'desc': 'desc_child_5',
'id': 4,
'parent_id': 5}],
'desc': 'desc_child_6',
'id': 5,
'parent_id': 6}],
'desc': 'desc_child_1',
'id': 6,
'parent_id': 1}],
'desc': 'desc_father',
'id': 1,
'parent_id': None}
This should even work with multiple root nodes (there will be a dummy Node with the id None at the top though)
This does not require recursion.
First create a dictionary of nodes, one for each item using id as the key, which includes an empty list of children. Then you can scan that dictionary, and add each node to the list of children for its parent (skipping those whose parent is None). Once this scan is complete, every node that isn't a root will be in the child list of its parent, and thus all trees will be complete.
The roots of the forrest are the nodes that have None for a parent.

Categories

Resources