Sort in nested dict by summing values python pandas - python

I have nested dict something like that
my_dict= {'name1': {'code1': {'brand1': 2}},'name2': {'code2.1': {'brand2.1': 2,'brand2.2': 8,'brand2.3': 5, 'brand2.4': 4},'code2.2': {'brand2.1': 2, 'brand1': 1, 'brand2.5': 25}},'name3': {'code1': {'brand2.1': 2},'code3': {'brand4': 1,'brand3.1':2}}}
I need sort on the level "code" with depending on summing values "brands". For example,
target_dict= {'name1': {'code1': {'brand1': 2}}, 'name2': {'code2.2': {'brand2.1':2,'brand1': 1,'brand2.5': 25},'code2.1': {'brand2.1': 2,'brand2.2': 8,'brand2.3': 5,'brand2.4': 4}}, 'name3': {'code3': {'brand4': 1, 'brand3.1':2},'code1': {'brand2.1': 2}}}
*# 'code2.2' first because 2+1+25=28 > 2+8+5+4=19
# 'code3' first because 1+2=3 > 2
I can sum values "brands" by "code" with
sum_values = [[[i, sum(v[i].values())] for i in v.keys()] for x,y in v.items() for k,v in my_dict.items()]
and try combine with sort function as
target_dict = sorted(my_dict.items(), key=lambda i: [[[i, sum(v[i].values())] for i in v.keys()] for x,y in v.items() for k,v in my_dict.items()], reverse=True).
Thanks for your attention and help!

Try (assuming sufficient version of Python to preserve creation order of dict):
my_dict = {
"name1": {"code1": {"brand1": 2}},
"name2": {
"code2.1": {"brand2.1": 2, "brand2.2": 8, "brand2.3": 5, "brand2.4": 4},
"code2.2": {"brand2.1": 2, "brand1": 1, "brand2.5": 25},
},
"name3": {"code1": {"brand2.1": 2}, "code3": {"brand4": 1, "brand3.1": 2}},
}
out = {
k: dict(sorted(v.items(), key=lambda d: sum(d[1].values()), reverse=True))
for k, v in my_dict.items()
}
print(out)
Prints:
{
"name1": {"code1": {"brand1": 2}},
"name2": {
"code2.2": {"brand2.1": 2, "brand1": 1, "brand2.5": 25},
"code2.1": {"brand2.1": 2, "brand2.2": 8, "brand2.3": 5, "brand2.4": 4},
},
"name3": {"code3": {"brand4": 1, "brand3.1": 2}, "code1": {"brand2.1": 2}},
}

Related

python nested dict convert from regular json to nest

I am stuck on convert jsons from regular (key-value) to nested.
for example:
j = { 'a': 5,
'b': 3,
'c.c1': 2,
'c.c2':5,
'd.dd.d1': 0,
'd.dd.d2':9
}
and I need to get:
new_j = {
'a': 5,
'b': 3,
'c':
{'c1': 2,
'c2':5}
'd':
{'dd' :
{'d1': 0,
'd2': 9}
},
}
there are easy method to do this?
How about this?
j_new = {}
for key,value in j.items():
keys = key.split('.')
level = j_new
for key in keys[:-1]:
if key not in level:
level[key]={}
level = level[key]
level[keys[-1]]=value
print(j_new)
Which returns:
{'a': 5, 'b': 3, 'c': {'c1': 2, 'c2': 5}, 'd': {'dd': {'d1': 0, 'd2': 9}}}
Try this:
d = {}
def setValue(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
for k,v in j.items():
setValue(d, k.split('.'), v)
Output:
{
"a": 5,
"c": {
"c2": 5,
"c1": 2
},
"b": 3,
"d": {
"dd": {
"d2": 9,
"d1": 0
}
}
}

Python: List of Dictionary Mapping

I have a list of 10,000 Dictionaries from a JSON that look like:
my_list =
[
{"id": 1, "val": "A"},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
.
.
.
{"id": 10000, "val": "A"}
]
and I want my output to be:
mapped_list =
[
{"id": 1, "val": ["A", "B", "C"]},
{"id": 2, "val": ["B"]},
{"id": 3, "val": ["C"]},
{"id": 4, "val": ["A", "B", "C"]},
.
.
.
{"id": 10000, "val": ["A","C"]}
]
My goal is to Map the first list's "id" and its "val" to create the 2nd list as efficiently as possible. So far my running time has not been the greatest:
output = []
cache = {}
for unit in my_list:
uid = unit['id']
value = unit['val']
if (uid in cache):
output[uid][value].append(value)
else:
cache[uid] = 1
output.append({'id' : uid, 'values': value})
My approach is to make a frequency check of the 'id' to avoid iterating through 2 different lists. I believe my fault is in understanding nested dicts/lists of dicts. I have a feeling I can get this in O(n), if not better, as O(n^2) is out of the question its too easy to grow this in magnitude.
Brighten my insight PLEASE, I could use the help.
Or any other way of approaching this problem.
Maybe map(), zip(), tuple() might be a better approach for this. Let me know!
EDIT: I'm trying to accomplish this with only built-in functions. Also, the last dictionary is to exemplify that this is not limited to what I have displayed but there are more "id's" than I can share with "val" being a combination of A,B,C for whatever id its associated with.
UPDATE:
This is my final solution, if there can be any improvements, Let me know!
mapped_list = []
cache = {}
for item in my_list:
id = item['id']
val = item['val']
if (id in cache):
output[cache[id]]['val'].append(val)
else:
cache[id] = len(output)
mapped_list.append({'id' : id, 'val': [val]})
mapped_list.sort(key=lambda k: k['id'])
print(output)
my_list=[
{"id": 1, "val": 'A'},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
{"id": 10000, "val": "A"}
]
temp_dict = {}
for item in my_list:
n, q = item.values()
if not n in temp_dict:
temp_dict[n] = []
temp_dict.get(n,[]).append(q)
mapped_list = [{'id': n, 'val': q} for n,q in temp_dict.items()]
mapped_list = sorted(mapped_list, key = lambda x : x['id'])
print(mapped_list)
If there are multiple val with the same id you can use a set like this:
my_list = [
{"id": 1, "val": "A"},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
{"id": 10000, "val": "A"}
]
from collections import defaultdict
ddict = defaultdict(set)
for lst in my_list:
ddict[lst['id']].add(lst['val'])
result = [{"id" : k,"val" : list(v)} for k,v in ddict.items()]
sorted(result,key = lambda x : x['id'])
[{'id': 1, 'val': ['C', 'A', 'B']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['C', 'A', 'B']},
{'id': 10000, 'val': ['A']}]
Insert or search in dict (or defaultdict) and set have O(1) complexity and the sort function have O(NlogN) so overall is O(N + NlogN)
You could just use collections.defaultdict like,
>>> my_list
[{'id': 1, 'val': 'A'}, {'id': 4, 'val': 'A'}, {'id': 1, 'val': 'C'}, {'id': 3, 'val': 'C'}, {'id': 1, 'val': 'B'}, {'id': 2, 'val': 'B'}, {'id': 4, 'val': 'C'}, {'id': 4, 'val': 'B'}, {'id': 10000, 'val': 'A'}]
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> for item in my_list:
... d[item['id']].append(item['val'])
...
>>> mapped_list = [{'id': key, 'val': val} for key,val in d.items()]
>>> mapped_list = sorted(mapped_list, key=lambda x: x['id']) # just to make it always sorted by `id`
>>> import pprint
>>> pprint.pprint(mapped_list)
[{'id': 1, 'val': ['A', 'C', 'B']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['A', 'C', 'B']},
{'id': 10000, 'val': ['A']}]
I think you won't be able to do it better than O(n*log(n)):
from collections import defaultdict
vals = defaultdict(list)
my_list.sort(key=lambda x: x['val'])
for i in my_list:
vals[i['id']].append(i['val'])
output = [{'id': k, 'val': v} for k, v in vals.items()]
output.sort(key=lambda x: x['id'])
Output:
[{'id': 1, 'val': ['A', 'B', 'C']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['A', 'B', 'C']},
{'id': 1000, 'val': ['A']}]
I am created mapped_list using setdefault
d = {}
for i in my_list:
d.setdefault(i['id'], []).append(i['val'])
mapped_list = [{'id':key, 'val': val} for key,val in sorted(d.items())]
print(mapped_list)
defaultdict makes better performance than setdefault.
I just make this answer for creating mapped_list using another approach

Add multiples dict into a dict

I have multiples list of dictionaries like this:
list_of_dictionaries_1 = [{
'a':1,
'b':2
}, {
'a':3,
'b':4
}]
list_of_dictionaries_2 = [{
'c':1,
'd':2
}, {
'c':3,
'd':4
}]
And I want to add each element into a new dictionary.
new_dictionary = {
data: [{
'a':1,
'b':2
}, {
'a':3,
'b':4
}, {
'c':1,
'd':2
}, {
'c':3,
'd':4
}]
}
So I made this for each list of dictionaries:
for dictionary_ in list_of_dictionaries_1:
new_dictionary['data'] = dictionary_
But this just return the last element in the list of dictionaries.
new_dictionary = {
data:[{
'c':3,
'd':4
}]
}
How can I add all de dictionaries in the new dictionary?
If I understood correctly, you could do it like this:
new_dictionary = {'data': []}
for elem in list_of_dictionaries_1 + list_of_dictionaries_2:
new_dictionary['data'].append(elem)
print(new_dictionary)
Output:
{'data': [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'c': 1, 'd': 2}, {'c': 3, 'd': 4}]}
You can use itertools.chain to merge the two lists:
from itertools import chain
new_dictionary = {'data': list(chain(list_of_dictionaries_1, list_of_dictionaries_2))}
new_dictionary becomes:
{'data': [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'c': 1, 'd': 2}, {'c': 3, 'd': 4}]}
Your dictionary structure looks inconsistent. But however, you can do the below to achieve what you are trying for.
list_of_dictionaries_1 = [{'a':1, 'b':2 }, {'a':3, 'b':4}]
list_of_dictionaries_2 = [{'c':1, 'd':2 }, {'c':3, 'd':4 }]
list_of_dictionaries_1.extend(list_of_dictionaries_2)
print(list_of_dictionaries_1)
Output:
[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'c': 1, 'd': 2}, {'c': 3, 'd': 4}]

Add String to list items and then create a dict in python

I have two list and i would like to create dict with each list where key value is a string and then combine those two dicts in one, below are my list :
list_1 : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
list_2 : ['BACKUP_INFO', 'sqlite_sequence', 'BACKUP_INFO_SEARCH', 'BACKUP_INFO_SEARCH_content', 'BACKUP_INFO_SEARCH_segments', 'BACKUP_INFO_SEARCH_segdir', 'BACKUP_INFO_SEARCH_docsize', 'BACKUP_INFO_SEARCH_stat', 'FILE_INFO', 'FILE_INFO_SEARCH', 'FILE_INFO_SEARCH_content', 'FILE_INFO_SEARCH_segments', 'FILE_INFO_SEARCH_segdir', 'FILE_INFO_SEARCH_docsize', 'FILE_INFO_SEARCH_stat']
List_1 should be added with dict key value as 'id'
List_2 should be added with dict key value as 'table'
Then, both the above dicts should be combined into one dict to form something similar to this :
{
"output":
{
"id": 1,
"table" : BACKUP_INFO
}
{
"id": 2,
"table" :sqlite_sequence
}
}
But, i am getting the below output using
table_list_out = dict(zip(list_1, list_2))
return { 'output' : {'id' : list_1, 'table_name' : list_2}}:
{
"output": {
"id": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15
],
"table_name": {
"1": "BACKUP_INFO",
"2": "sqlite_sequence",
"3": "BACKUP_INFO_SEARCH",
"4": "BACKUP_INFO_SEARCH_content",
"5": "BACKUP_INFO_SEARCH_segments",
"6": "BACKUP_INFO_SEARCH_segdir",
"7": "BACKUP_INFO_SEARCH_docsize",
"8": "BACKUP_INFO_SEARCH_stat",
"9": "FILE_INFO",
"10": "FILE_INFO_SEARCH",
"11": "FILE_INFO_SEARCH_content",
"12": "FILE_INFO_SEARCH_segments",
"13": "FILE_INFO_SEARCH_segdir",
"14": "FILE_INFO_SEARCH_docsize",
"15": "FILE_INFO_SEARCH_stat"
}
}
}
You can use a list comprehension:
list_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
list_2 = ['BACKUP_INFO', 'sqlite_sequence', 'BACKUP_INFO_SEARCH', 'BACKUP_INFO_SEARCH_content', 'BACKUP_INFO_SEARCH_segments', 'BACKUP_INFO_SEARCH_segdir', 'BACKUP_INFO_SEARCH_docsize', 'BACKUP_INFO_SEARCH_stat', 'FILE_INFO', 'FILE_INFO_SEARCH', 'FILE_INFO_SEARCH_content', 'FILE_INFO_SEARCH_segments', 'FILE_INFO_SEARCH_segdir', 'FILE_INFO_SEARCH_docsize', 'FILE_INFO_SEARCH_stat']
new_dict = {'output':[{'id':a, 'table':b} for a, b in zip(list_1, list_2)]}
Output:
{'output': [{'table': 'BACKUP_INFO', 'id': 1}, {'table': 'sqlite_sequence', 'id': 2}, {'table': 'BACKUP_INFO_SEARCH', 'id': 3}, {'table': 'BACKUP_INFO_SEARCH_content', 'id': 4}, {'table': 'BACKUP_INFO_SEARCH_segments', 'id': 5}, {'table': 'BACKUP_INFO_SEARCH_segdir', 'id': 6}, {'table': 'BACKUP_INFO_SEARCH_docsize', 'id': 7}, {'table': 'BACKUP_INFO_SEARCH_stat', 'id': 8}, {'table': 'FILE_INFO', 'id': 9}, {'table': 'FILE_INFO_SEARCH', 'id': 10}, {'table': 'FILE_INFO_SEARCH_content', 'id': 11}, {'table': 'FILE_INFO_SEARCH_segments', 'id': 12}, {'table': 'FILE_INFO_SEARCH_segdir', 'id': 13}, {'table': 'FILE_INFO_SEARCH_docsize', 'id': 14}, {'table': 'FILE_INFO_SEARCH_stat', 'id': 15}]}
From the looks of things, your desired output is impossible. Notice that there are multiple values corresponding to the key: "output".
What is possible is something like this, where the value corresponding to 'output' is a list of dictionaries.
return {'output': [{'id': x, 'table': y} for x, y in zip(list1, list2)]}
You can just loop through it, I'm sure there's a one-liner but this is rather clear.
output = {"output":{} }
for i in xrange(0, len(list_1)):
output["output"][list_2[i]] = list_1[i]
print output

Summing 2 level of nested dictionaries in Python

I have 2 nested dictionaries variable that have the similar keys, each defining different values :
data1 = {
"2010":{
'A':2,
'B':3,
'C':5
},
"2011":{
'A':1,
'B':2,
'C':3
},
"2012":{
'A':1,
'B':2,
'C':4
}
}
data2 = {
"2010":{
'A':4,
'B':4,
'C':5
},
"2011":{
'A':1,
'B':1,
'C':3
},
"2012":{
'A':3,
'B':2,
'C':4
}
}
In my case, i need to sum both dictionaries values based on the same keys, so the answer will be like this:
data3 = {
"2010":{
'A':6,
'B':7,
'C':10
},
"2011":{
'A':2,
'B':3,
'C':6
},
"2012":{
'A':4,
'B':4,
'C':8
}
}
How can i do that?
Given the structure of the two dictionaries is the same, you can use dictionary comprehension for that:
data3 = {key:{key2:val1+data2[key][key2] for key2,val1 in subdic.items()} for key,subdic in data1.items()}
In the repl:
>>> {key:{key2:val1+data2[key][key2] for key2,val1 in subdic.items()} for key,subdic in data1.items()}
{'2010': {'B': 7, 'C': 10, 'A': 6}, '2012': {'B': 4, 'C': 8, 'A': 4}, '2011': {'B': 3, 'C': 6, 'A': 2}}
The comprehension works as follows: in the outerloop, we iterate over the key,subdic of data1. So in your case, key is a year and subdic is the dictionary (of data1) for that year.
Now for each of these years, we iterate over the items of the subdic and here key2 is 'A', 'B' and 'C'. val1 is the value that we find in data1 for these keys. We get the other value by querying data2[key][key2]. We sum these up and construct new dictionaries for that.
I hope this helps:
data1 = { "2010":{ 'A':2, 'B':3, 'C':5 }, "2011":{ 'A':1, 'B':2, 'C':3 }, "2012":{ 'A':1, 'B':2, 'C':4 } }
data2 = { "2010":{ 'A':4, 'B':4, 'C':5 }, "2011":{ 'A':1, 'B':1, 'C':3 }, "2012":{ 'A':3, 'B':2, 'C':4 } }
data3 = {}
for data in [data1,data2]:
for year in data.keys():
for x,y in data[year].items():
if not year in data3.keys():
data3[year] = {x:y}
else:
if not x in data3[year].keys():
data3[year].update({x:y})
else:
data3[year].update({x:data3[year][x] + y})
print data3
This works for arbitrary lengths of the inner and outer dictionaries.
Another solution :)
You can also use zip to get both data1 and data2 in the same for loop, and then use collections.Counter to add the value of each dicts.
from collections import Counter
>> {k1: Counter(v1) + Counter(v2) for (k1, v1), (k2, v2) in zip(sorted(data1.items()), sorted(data2.items()))}
{'2011': Counter({'C': 6, 'B': 3, 'A': 2}), '2010': Counter({'C': 10, 'B': 7, 'A': 6}), '2012': Counter({'C': 8, 'A': 4, 'B': 4})}
You will ended with Counter dict but since it is a subclass of dict you can still use the same method as a regular dict.
If you add dict() to Max Chrétiens' nice short solution from above, you will end up with regular dictionaries:
data3 = {k1: dict(Counter(v1) + Counter(v2)) for (k1, v1), (k2, v2) in
zip(data1.items(), data2.items())}
This will, however, only work correctly if both dictionaries share exactly the same keys as already discussed above. Willem Van Onsem's solution will not work if there are any keys not shared by both dictionaries either (it will result in an error, whereas Max Chrétiens' solution will in this case merge items incorrectly). Now you mentioned you are using JSON data which always contains the same structure with similar keys, so this should not constitute a problem and Max Chrétien's solution should work nicely.
In case you do want to make sure only keys shared by both dictionaries (and their subdictionaries) are used, the following will work. Notice how I added 'X': 111111 as a key value pair to the 2012 subdictionary and "1999": { 'Z': 999999 } as an entire subdictionary.
def sum_two_nested_dicts(d1, d2):
dicts = [d1, d2]
d_sum = {}
for topkey in dicts[0]:
if topkey in dicts[1]:
d_sum[topkey] = {}
for key in dicts[0][topkey]:
if key in dicts[1][topkey]:
new_val = sum([d[topkey][key] for d in dicts])
d_sum[topkey][key] = new_val
return d_sum
data1 = {
"2010": {
'A': 2,
'B': 3,
'C': 5
},
"2011": {
'A': 1,
'B': 2,
'C': 3
},
"2012": {
'A': 1,
'B': 2,
'C': 4,
'X': 111111
},
"1999": {
'Z': 999999
}
}
data2 = {
"2010": {
'A': 4,
'B': 4,
'C': 5
},
"2011": {
'A': 1,
'B': 1,
'C': 3
},
"2012": {
'A': 3,
'B': 2,
'C': 4
}
}
data3 = sum_two_nested_dicts(data1, data2)
print(data3)
# different order of arguments
data4 = sum_two_nested_dicts(data2, data1)
print(data4)
# {'2010': {'C': 10, 'A': 6, 'B': 7}, '2012': {'C': 8, 'A': 4, 'B': 4}, '2011': {'C': 6, 'A': 2, 'B': 3}}
# {'2010': {'C': 10, 'A': 6, 'B': 7}, '2012': {'C': 8, 'A': 4, 'B': 4}, '2011': {'C': 6, 'A': 2, 'B': 3}}
I realize this is far from as concise and elegant as can be, but as I already wrote it anyways, I post it here in case someone is trying to achieve this particular functionality.
Long and bloated version which retains unshared keys/values, just because I already wrote it...
def sum_nested_dicts(dic1, dic2):
# create list of both dictionaries
dicts = [dic1, dic2]
# create a set of all unique keys from both dictionaries
topkeys = set(sum([list(dic.keys()) for dic in dicts], []))
# this is the merged dictionary to be returned
d_sum = {}
for topkey in topkeys:
# if topkey is shared by both dictionaries
if topkey in dic1 and topkey in dic2:
d_sum[topkey] = {}
keys = set(sum([list(dic[topkey].keys()) for dic in
dicts], []))
for key in keys:
# if key is shared by both subdictionaries
if key in dic1[topkey] and key in dic2[topkey]:
new_val = sum([d[topkey][key] for d in dicts])
d_sum[topkey][key] = new_val
# if key is only contained in one subdictionary
elif key in dic1[topkey]:
d_sum[topkey][key] = dic1[topkey][key]
elif key in dic2[topkey]:
d_sum[topkey][key] = dic2[topkey][key]
# if topkey is only contained in one dictionary
elif topkey in dic1:
d_sum[topkey] = dic1[topkey]
elif topkey in dic2:
d_sum[topkey] = dic2[topkey]
return d_sum
See Crystal's solution for what seems to be the most concise and functional solution posted thus far.

Categories

Resources