My input is
[['apple',{'john':3,'anna':4,'kitty':6}],['pear',{'john':4,'anna':3,'kitty':3}]]
Expected output:
{
'key':['apple','pear'],
'value':[
{
'name':'john',
'data':[3,4]
},
{
'name':'anna',
'data':[4,3]
},
{
'name':'kitty',
'data':[6,3]
}
]
}
The key is a list which conclude the first part of each item, such as 'apple' 'pear', and the value is another list.
How should I do it?
You can achieve this with the help of collections.defaultdict:
from collections import defaultdict
value, key = defaultdict(list), []
for x in l:
key.append(x[0])
for k, v in x[1].items():
value[k].append(v)
To get the result:
In [15]: {'key': key, 'value': [{'name': k, 'data': v} for k, v in value.items()]}
Out[15]:
{'key': ['apple', 'pear'],
'value': [
{'data': [4, 3], 'name': 'anna'},
{'data': [6, 3], 'name': 'kitty'},
{'data': [3, 4], 'name': 'john'}]}
For a more efficient (?) version, subclass defaultdict to customize the default __missing__ hook to call the default_factory with missing key as a parameter (I copied this text and the implementation from the other answer of mine). Then you'll be able to do this in a single pass:
from collections import defaultdict
class mydefaultdict(defaultdict):
def __missing__(self, key):
self[key] = value = self.default_factory(key)
return value
# pass 'name' to the dictionary
value = mydefaultdict(lambda name: {'name': name, 'data': []})
key = []
for x in l:
key.append(x[0])
for k, v in x[1].items():
value[k]['data'].append(v)
The result is then
In [24]: {'key': key, 'value': value.values()}
Out[24]:
{'key': ['apple', 'pear'],
'value': [
{'data': [4, 3], 'name': 'anna'},
{'data': [6, 3], 'name': 'kitty'},
{'data': [3, 4], 'name': 'john'}]}
In Python 3, you'll have to call list(value.values()) instead of just value.values() to get a list object.
You can use following snippet:
input = [['apple',{'john':3,'anna':4,'kitty':6}],['pear',{'john':4,'anna':3,'kitty':3}]]
tmp = {}
output = {'key': [], 'value': []}
for item in input:
output['key'].append(item[0])
for name in item[1]:
try:
tmp[name].append(item[1][name])
except KeyError:
tmp[name] = [item[1][name]]
output['value'] = [{'name': name, 'data': data} for name, data in tmp.items()]
This function can help you
def map_data(data):
_tmp = {}
_keys = []
for _d in data:
_keys.append(_d[0])
for _k in _d[1].keys():
_v = _tmp.get(_k)
if not _v:
_v = {"name": _k, "data": []}
_v["data"].append(_d[1][_k])
_tmp[_k] = _v
return {"key": _keys, "value": [_v for _v in _tmp.values()]}
Here is my solution:
import json
my_list = [['apple',{'john':3,'anna':4,'kitty':6}],['pear',{'john':4,'anna':3,'kitty':3}]]
name_list = [item[1] for item in my_list] # [{'john': 3, 'kitty': 6, 'anna': 4}, {'john': 4, 'kitty
names = name_list[0].keys() # ['john', 'kitty', 'anna']
name_values = [[item[key] for item in name_list] for key in names] # [[3, 4], [6, 3], [4, 3]]
result = {
'key': [item[0] for item in my_list],
'value': [
{'name': name, 'value': value}
for (name, value) in zip(names, name_values)
]
}
print(json.dumps(result, indent=4))
And the output:
{
"value": [
{
"name": "john",
"value": [
3,
4
]
},
{
"name": "kitty",
"value": [
6,
3
]
},
{
"name": "anna",
"value": [
4,
3
]
}
],
"key": [
"apple",
"pear"
]
}
EDIT:
emmm, just found a better way to merge the dict value.
If the name_dict look like this one:
>>> name_dict
[{'john': [3], 'kitty': [6], 'anna': [4]}, {'john': [4], 'kitty': [3], 'anna': [3]}]
the task would be easy. What's the difference? The value is a list.
Now, we can use collections.Counter to merge two dicts!
>>> Counter(name_dict[0]) + Counter(name_dict[1])
Counter({'kitty': [6, 3], 'anna': [4, 3], 'john': [3, 4]})
so here is the new solution, we convert the value to a list first:(skip the 'key', only show the 'value'):
from collections import Counter
my_list = [['apple',{'john':3,'anna':4,'kitty':6}],['pear',{'john':4,'anna':3,'kitty':3}]]
name_list = [item[1] for item in my_list]
for item in name_list:
for key, value in item.items():
item[key] = [value]
name_values = dict(Counter(name_list[0]) + Counter(name_list[1])) # {'john': [3, 4], 'kitty': [6, 3], 'anna': [4, 3]}
print([{'name': name, 'value': value} for (name, value) in name_values.items()])
# output
[{'name': 'john', 'value': [3, 4]}, {'name': 'kitty', 'value': [6, 3]}, {'name': 'anna', 'value': [4, 3]}]
Related
I have the following list of dictionaries:
[
{"id": 1, "roll_id": ["101", "201"]},
{"id": 2, "roll_id": ["301", "201"]},
{"id": 3, "roll_id": ["424"]}
]
Now I need to convert this into the following format:
[
{'roll_id': '101', 'id':["1"]},
{'roll_id': '201', 'id':["1","2"]},
{'roll_id': '301', 'id':["2"]},
{'roll_id': '424', 'id':["3"]}
]
Can anyone help me, please?
You can use a dictionary+setdefault to collect the values, then convert to list:
out = {}
for d in l:
for RID in d['roll_id']:
out.setdefault(RID, {'roll_id': RID, 'id': []})['id'].append(d['id'])
out = list(out.values())
Another solution using pandas:
l = [
{"id": 1, "roll_id": ["101", "201"]},
{"id": 2, "roll_id": ["301", "201"]},
{"id": 3, "roll_id": ["424"]}
]
import pandas as pd
out = (pd
.json_normalize(l)
.explode('roll_id')
.groupby('roll_id', as_index=False)
['id'].agg(list)
.to_dict('records')
)
Output:
[{'roll_id': '101', 'id': [1]},
{'roll_id': '201', 'id': [1, 2]},
{'roll_id': '301', 'id': [2]},
{'roll_id': '424', 'id': [3]}]
Try this:
data = [{"id": 1, "roll_id": ["101", "201"]}, {"id": 2, "roll_id": ["301", "201"]}, {"id": 3, "roll_id": ["424"]}]
res = []
for el in data:
for r in el["roll_id"]:
find = [i for i, v in enumerate(res) if v['roll_id'] == r]
if not find:
res.append({'roll_id': r, "id": [el['id']]})
else:
res[find[0]]['id'].append(el['id'])['id'].append(el['id'])
print(res)
Result:
[{'roll_id': '101', 'id': [1]}, {'roll_id': '201', 'id': [1, 2]}, {'roll_id': '301', 'id': [2]}, {'roll_id': '424', 'id': [3]}]
Not my best but it work.
Regards,
I think the most efficient way of doing it could look something like this:
def convert(input_dict):
result = {}
for dic in input_dict:
for roll_id in dic["roll_id"]:
str_id = str(dic["id"])
if roll_id in result:
result[roll_id]["id"].append(str_id)
else:
result[roll_id] = {"roll_id":roll_id, "id":[str_id]}
return [result[i] for i in result]
print(convert(test))
I have nested dict something like that
my_dict= {'name1': {'code1': {'brand1': 2}},'name2': {'code2.1': {'brand2.1': 2,'brand2.2': 8,'brand2.3': 5, 'brand2.4': 4},'code2.2': {'brand2.1': 2, 'brand1': 1, 'brand2.5': 25}},'name3': {'code1': {'brand2.1': 2},'code3': {'brand4': 1,'brand3.1':2}}}
I need sort on the level "code" with depending on summing values "brands". For example,
target_dict= {'name1': {'code1': {'brand1': 2}}, 'name2': {'code2.2': {'brand2.1':2,'brand1': 1,'brand2.5': 25},'code2.1': {'brand2.1': 2,'brand2.2': 8,'brand2.3': 5,'brand2.4': 4}}, 'name3': {'code3': {'brand4': 1, 'brand3.1':2},'code1': {'brand2.1': 2}}}
*# 'code2.2' first because 2+1+25=28 > 2+8+5+4=19
# 'code3' first because 1+2=3 > 2
I can sum values "brands" by "code" with
sum_values = [[[i, sum(v[i].values())] for i in v.keys()] for x,y in v.items() for k,v in my_dict.items()]
and try combine with sort function as
target_dict = sorted(my_dict.items(), key=lambda i: [[[i, sum(v[i].values())] for i in v.keys()] for x,y in v.items() for k,v in my_dict.items()], reverse=True).
Thanks for your attention and help!
Try (assuming sufficient version of Python to preserve creation order of dict):
my_dict = {
"name1": {"code1": {"brand1": 2}},
"name2": {
"code2.1": {"brand2.1": 2, "brand2.2": 8, "brand2.3": 5, "brand2.4": 4},
"code2.2": {"brand2.1": 2, "brand1": 1, "brand2.5": 25},
},
"name3": {"code1": {"brand2.1": 2}, "code3": {"brand4": 1, "brand3.1": 2}},
}
out = {
k: dict(sorted(v.items(), key=lambda d: sum(d[1].values()), reverse=True))
for k, v in my_dict.items()
}
print(out)
Prints:
{
"name1": {"code1": {"brand1": 2}},
"name2": {
"code2.2": {"brand2.1": 2, "brand1": 1, "brand2.5": 25},
"code2.1": {"brand2.1": 2, "brand2.2": 8, "brand2.3": 5, "brand2.4": 4},
},
"name3": {"code3": {"brand4": 1, "brand3.1": 2}, "code1": {"brand2.1": 2}},
}
I have a list of 10,000 Dictionaries from a JSON that look like:
my_list =
[
{"id": 1, "val": "A"},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
.
.
.
{"id": 10000, "val": "A"}
]
and I want my output to be:
mapped_list =
[
{"id": 1, "val": ["A", "B", "C"]},
{"id": 2, "val": ["B"]},
{"id": 3, "val": ["C"]},
{"id": 4, "val": ["A", "B", "C"]},
.
.
.
{"id": 10000, "val": ["A","C"]}
]
My goal is to Map the first list's "id" and its "val" to create the 2nd list as efficiently as possible. So far my running time has not been the greatest:
output = []
cache = {}
for unit in my_list:
uid = unit['id']
value = unit['val']
if (uid in cache):
output[uid][value].append(value)
else:
cache[uid] = 1
output.append({'id' : uid, 'values': value})
My approach is to make a frequency check of the 'id' to avoid iterating through 2 different lists. I believe my fault is in understanding nested dicts/lists of dicts. I have a feeling I can get this in O(n), if not better, as O(n^2) is out of the question its too easy to grow this in magnitude.
Brighten my insight PLEASE, I could use the help.
Or any other way of approaching this problem.
Maybe map(), zip(), tuple() might be a better approach for this. Let me know!
EDIT: I'm trying to accomplish this with only built-in functions. Also, the last dictionary is to exemplify that this is not limited to what I have displayed but there are more "id's" than I can share with "val" being a combination of A,B,C for whatever id its associated with.
UPDATE:
This is my final solution, if there can be any improvements, Let me know!
mapped_list = []
cache = {}
for item in my_list:
id = item['id']
val = item['val']
if (id in cache):
output[cache[id]]['val'].append(val)
else:
cache[id] = len(output)
mapped_list.append({'id' : id, 'val': [val]})
mapped_list.sort(key=lambda k: k['id'])
print(output)
my_list=[
{"id": 1, "val": 'A'},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
{"id": 10000, "val": "A"}
]
temp_dict = {}
for item in my_list:
n, q = item.values()
if not n in temp_dict:
temp_dict[n] = []
temp_dict.get(n,[]).append(q)
mapped_list = [{'id': n, 'val': q} for n,q in temp_dict.items()]
mapped_list = sorted(mapped_list, key = lambda x : x['id'])
print(mapped_list)
If there are multiple val with the same id you can use a set like this:
my_list = [
{"id": 1, "val": "A"},
{"id": 4, "val": "A"},
{"id": 1, "val": "C"},
{"id": 3, "val": "C"},
{"id": 1, "val": "B"},
{"id": 2, "val": "B"},
{"id": 4, "val": "C"},
{"id": 4, "val": "B"},
{"id": 10000, "val": "A"}
]
from collections import defaultdict
ddict = defaultdict(set)
for lst in my_list:
ddict[lst['id']].add(lst['val'])
result = [{"id" : k,"val" : list(v)} for k,v in ddict.items()]
sorted(result,key = lambda x : x['id'])
[{'id': 1, 'val': ['C', 'A', 'B']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['C', 'A', 'B']},
{'id': 10000, 'val': ['A']}]
Insert or search in dict (or defaultdict) and set have O(1) complexity and the sort function have O(NlogN) so overall is O(N + NlogN)
You could just use collections.defaultdict like,
>>> my_list
[{'id': 1, 'val': 'A'}, {'id': 4, 'val': 'A'}, {'id': 1, 'val': 'C'}, {'id': 3, 'val': 'C'}, {'id': 1, 'val': 'B'}, {'id': 2, 'val': 'B'}, {'id': 4, 'val': 'C'}, {'id': 4, 'val': 'B'}, {'id': 10000, 'val': 'A'}]
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> for item in my_list:
... d[item['id']].append(item['val'])
...
>>> mapped_list = [{'id': key, 'val': val} for key,val in d.items()]
>>> mapped_list = sorted(mapped_list, key=lambda x: x['id']) # just to make it always sorted by `id`
>>> import pprint
>>> pprint.pprint(mapped_list)
[{'id': 1, 'val': ['A', 'C', 'B']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['A', 'C', 'B']},
{'id': 10000, 'val': ['A']}]
I think you won't be able to do it better than O(n*log(n)):
from collections import defaultdict
vals = defaultdict(list)
my_list.sort(key=lambda x: x['val'])
for i in my_list:
vals[i['id']].append(i['val'])
output = [{'id': k, 'val': v} for k, v in vals.items()]
output.sort(key=lambda x: x['id'])
Output:
[{'id': 1, 'val': ['A', 'B', 'C']},
{'id': 2, 'val': ['B']},
{'id': 3, 'val': ['C']},
{'id': 4, 'val': ['A', 'B', 'C']},
{'id': 1000, 'val': ['A']}]
I am created mapped_list using setdefault
d = {}
for i in my_list:
d.setdefault(i['id'], []).append(i['val'])
mapped_list = [{'id':key, 'val': val} for key,val in sorted(d.items())]
print(mapped_list)
defaultdict makes better performance than setdefault.
I just make this answer for creating mapped_list using another approach
I have a list of python dicts like this:
[{
'id': 1,
'name': 'name1'
}, {
'id': 2,
'name': 'name2'
}, {
'id': 3,
'name': 'name1'
}]
What I want to do is to create a new list of dictionaries, containing only the ones that have the key 'name' duplicated, and group them.
[{
'id1': 1,
'id2': 3,
'name': 'name1'
}]
The first list is an SQL query output and I need to delete the rows that have the key 'name' duplicated, keeping only one.
You can use itertools.groupby:
import itertools
d = [{'id': 1, 'name': 'name1'}, {'id': 2, 'name': 'name2'}, {'id': 3, 'name': 'name1'}]
new_data = [[a, list(b)] for a, b in itertools.groupby(sorted(d, key=lambda x:x['name']), key=lambda x:x['name'])]
final_dicts = [{'name':a, **{f'id{i}':a['id'] for i, a in enumerate(b, 1)}} for a, b in new_data if len(b) > 1]
Output:
[{'name': 'name1', 'id1': 1, 'id2': 3}]
I suggest you the following solution, quite easy to read and understand:
from collections import defaultdict
ds = [{'id': 1, 'name': 'name1'},
{'id': 2, 'name': 'name2'},
{'id': 3, 'name': 'name1'}]
newd = defaultdict(list)
for d in ds:
newd[d['name']].append(d['id'])
# Here newd is {'name1': [1, 3], 'name2': [2]}
result = []
for k,v in newd.items():
if len(v) > 1:
d = {f'id{i}':i for i in v}
d['name'] = k
result.append(d)
print(result) # [{'id1': 1, 'id3': 3, 'name': 'name1'}]
You can use collections.Counter:
from collections import Counter
from operator import itemgetter
l = [{'id': 1, 'name': 'name1'}, {'id': 2, 'name': 'name2'}, {'id': 3, 'name': 'name1'}]
print([{'name': n, **{'id%d' % i: d['id'] for i, d in enumerate([d for d in l if d['name'] == n], 1)}} for n, c in Counter(map(itemgetter('name'), l)).items() if c > 1])
This outputs:
[{'name': 'name1', 'id1': 1, 'id2': 3}]
My current list:
my_list = [
{'id': 1, 'val': [6]},
{'id': 2, 'val': [7]},
{'id': 3, 'val': [8]},
{'id': 2, 'val': [9]},
{'id': 1, 'val': [10]},
]
Desired output:
my_list = [
{'id': 1, 'val': [6, 10]},
{'id': 2, 'val': [7, 9]},
{'id': 3, 'val': [8]},
]
what I tried so far:
my_new_list = []
id_set = set()
for d in my_list:
if d['id'] not in id_set:
id_set.add(d['id'])
temp = {'id': d['id'], 'val': d['val']}
my_new_list.append(temp)
else:
# loop over the new list and find the dict which already have d['id'] and update by appending value
# but this is not efficient
any other more efficient approach or may be some inbuilt function I'm not aware of.
PS: Order is important!
.setdefault() is your friend:
(We should use collections.OrderedDict to remember the order that keys were first inserted.)
>>> import collections
>>> result = collections.OrderedDict()
>>> for d in my_list:
... result.setdefault(d["id"], []).extend(d["val"])
>>> lst = []
>>> for k, v in result.items():
... lst.append({"id": k, "val": v})
Same approach as ozgur, but using collections.defaultdict:
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> for dd in my_list:
d[dd['id']].extend(dd['val'])
>>> d
defaultdict(<type 'list'>, {1: [6, 10], 2: [7, 9], 3: [8]})
>>>
>>> lst = []
>>> for k,v in d.iteritems():
lst.append({'id':k, 'val':v})
>>> lst
[{'id': 1, 'val': [6, 10]}, {'id': 2, 'val': [7, 9]}, {'id': 3, 'val': [8]}]
>>>
You can use itertools.groupby in order to sort and group the original list by 'id' and accumulate the 'val' for each group:
from itertools import groupby
key_fnc = lambda d: d['id']
result = [
{'id': k, 'val': sum([d['val'] for d in g], [])}
for k, g in groupby(sorted(my_list, key=key_fnc), key=key_fnc)
]