Merge two list of dictionaries based on specific key - python

Ok, so I have this code.
data1 = [
{'Id': 1},
{'Id': 2}
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444}
]
expectedData = [
{'Id': 1, 'Score': 100},
{'Id': 2, 'Score': 200}
]
def merge_lists(data1, data2, key):
merged = {}
for item in data1+data2:
if item[key] in merged:
merged[item[key]].update(item)
else:
merged[item[key]] = item
return [val for (_, val) in merged.items()]
merged = merge_lists(data1, data2, 'Id')
print merged
The problem is that this will merge every value(that's not 'id') in data2 into data1. I only want it to merge the key 'score', but I'm really not sure how to specify that key only. I've tried multiple other conditional statements in order to specify the 'score' key. But I can't seem to get anything working.
Thanks for any help

You could factory function like this
data1 = [
{'Id': 1},
{'Id': 2}
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444}
]
def get_score(list_of_dict, id_value):
for dict_ in list_of_dict:
if dict_["Id"] == id_value:
return {"score": dict_["score"]}
res = data1.copy()
for dict_ in res:
dict_.update(get_score(data2, dict_["Id"]))
print(res)
# [{'score': 100, 'Id': 1}, {'score': 200, 'Id': 2}]

Here's code that will merge the way you want. Note that if there are multiple lists in data2 with the correct matching key only the first one will be found.
data1 = [
{'Id': 1},
{'Id': 2},
]
data2 = [
{'Id': 1, 'score': 100, 'testdata': 333},
{'Id': 2, 'score': 200, 'testdata': 555},
{'Id': 3, 'score': 300, 'testdata': 444},
]
def merge_lists(data1, data2, key):
result = []
for d1 in data1:
val = d1[key]
dnew = {key: val}
for d2 in data2:
if d2[key] == val:
dnew['score'] = d2['score']
break
else:
raise KeyError('No match for %r: %s' % (key, val))
result.append(dnew)
return result
merged = merge_lists(data1, data2, 'Id')
print merged
output
[{'score': 100, 'Id': 1}, {'score': 200, 'Id': 2}]
If we change data1 to
data1 = [
{'Id': 1},
{'Id': 5},
{'Id': 2},
]
Then we get this output:
Traceback (most recent call last):
File "./qtest.py", line 45, in <module>
merged = merge_lists(data1, data2, 'Id')
File "./qtest.py", line 41, in merge_lists
raise KeyError('No match for %r: %s' % (key, val))
KeyError: "No match for 'Id': 5"

Related

How to remove empty key-value from dictionary comprehension when applying filter

I am new to python and learning how to use a dictionary comprehension. I have a movie cast dictionary that I would like to filter on a specific value using the dictionary comprehension technique. I was able to get it work but for some reason I get empty dictionaries added as well if the condition is not met. Why does it do it? And how can I ensure these are not included?
movie_cast = [{'id': 90633,'name': 'Gal Gadot','cast_id': 0, 'order': 0},
{'id': 62064, 'name': 'Chris Pine','cast_id': 15, 'order': 1},
{'id': 41091, 'name': 'Kristen Wiig', 'cast_id': 12,'order': 2},
{'id': 41092, 'name': 'Pedro Pascal', 'cast_id': 13, 'order': 3},
{'id': 32, 'name': 'Robin Wright', 'cast_id': 78, 'order': 4}]
limit = 1
cast_limit = []
for dict in movie_cast:
d = {key:value for (key,value) in dict.items() if dict['order'] < limit}
cast_limit.append(d)
print(cast_limit)
current_result = [{'id': 90633,'name': 'Gal Gadot','cast_id': 0, 'order': 0},
{'id': 62064, 'name': 'Chris Pine','cast_id': 15, 'order': 1},{},{},{}]
desired_result = [{'id': 90633,'name': 'Gal Gadot','cast_id': 0, 'order': 0},
{'id': 62064, 'name': 'Chris Pine','cast_id': 15, 'order': 1}]
Try with this (you need a list comprehension, not a dict comprehension):
cast_limit = [dct for dct in movie_cast if dct['order'] < limit]
I.e., you need to filter out elements of the list, not elements of a dict.

remove duplicate dictionary python

I have a problem, I have a list like this:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1}, {'id': 10, 'questionid': 5,
'text': 'test answer updated', 'score': 2}, {'id': 20, 'questionid': 5, 'text': 'no',
'score': 0}, {'id': 35, 'questionid': 5, 'text': 'yes', 'score': 1}]
and I want remove duplicate "questionid", "text" and "score", for example in this case I want output like this:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1}, {'id': 10, 'questionid': 5,
'text': 'test answer updated', 'score': 2}, {'id': 20, 'questionid': 5, 'text': 'no',
'score': 0}]
How can I get this output in python?
We could create dictionary that has "questionid", "text" and "score" tuple as key and dicts as values and use this dictionary to check for duplicate values in data:
from operator import itemgetter
out = {}
for d in data:
key = itemgetter("questionid", "text", "score")(d)
if key not in out:
out[key] = d
out = list(out.values())
Output:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1},
{'id': 10, 'questionid': 5, 'text': 'test answer updated', 'score': 2},
{'id': 20, 'questionid': 5, 'text': 'no', 'score': 0}]

Increment a key value in a list of dictionaries

I would like to add an id key to a list of dictionaries, where each id represents the enumerated nested dictionary.
Current list of dictionaries:
current_list_d = [{'id': 0, 'name': 'Paco', 'age': 18} #all id's are 0
{'id': 0, 'name': 'John', 'age': 20}
{'id': 0, 'name': 'Claire', 'age': 22}]
Desired output:
output_list_d = [{'id': 1, 'name': 'Paco', 'age': 18} #id's are counted/enumerated
{'id': 2, 'name': 'John', 'age': 20}
{'id': 3, 'name': 'Claire', 'age': 22}]
My code:
for d in current_list_d:
d["id"]+=1
You could use a simple for loop with enumerate and update in-place the id keys in the dictionaries:
for new_id, d in enumerate(current_list_d, start=1):
d['id'] = new_id
current_list_d
[{'id': 1, 'name': 'Paco', 'age': 18},
{'id': 2, 'name': 'John', 'age': 20},
{'id': 3, 'name': 'Claire', 'age': 22}]
You can use a variable.
id_val = 1
for dict in current_list_d :
dict["id"] = id_val
id_val+=1

Python update a value in a list of dictionaries from another list of dictionaries

If given two list of dictionaries (score_list and update_list) below, how do I update score_list from the list of dictionaries from update_list?
score_list = [{'id': 1, 'score': 123}, {'id': 2, 'score': 234}, {'id': 3, 'score': 345}]
update_list = [{'id': 1, 'score': 500}, {'id': 3, 'score': 300}]
# return this
score_list = [{'id': 1, 'score': 500}, {'id': 2, 'score': 234}, {'id': 3, 'score': 300}]
I highly recommend using a mapping when you have a unique key to match:
update_mapping = {d['id']: d for d in update_list}
score_list = [update_mapping.get(d['id'], d) for d in score_list]

Merge dicts from a list of dicts based on some key/value pair

I have a list of dicts shown below , I want to merge some dicts into one based some key/value pair.
[
{'key': 16, 'value': 3, 'user': 3, 'id': 7},
{'key': 17, 'value': 4, 'user': 3, 'id': 7},
{'key': 17, 'value': 5, 'user': 578, 'id': 7},
{'key': 52, 'value': 1, 'user': 3, 'id': 48},
{'key': 46, 'value': 2, 'user': 578, 'id': 48}
]
Now as you can see dict 1 & 2 have same values for user & id keys. So it is possible to merge these two dicts like
[
{'key': [16,17], 'value': [3,4], 'user': 3, 'id': 7},
{'key': [17], 'value': [5], 'user': 578, 'id': 7},
{'key': [52], 'value': [1], 'user': 3, 'id': 48},
{'key': [46], 'value': [2], 'user': 578, 'id': 48}
]
means user & id value must be unique together.What will be the efficient way to merge (if possible)
Following function will convert the list of dictionaries to new format:
def convert(d):
res = {}
for x in d:
key = (x['user'], x['id'])
if key in res:
res[key]['key'].append(x['key'])
res[key]['value'].append(x['value'])
else:
x['key'] = [x['key']]
x['value'] = [x['value']]
res[key] = x
return res.values()
It will mutate the original dictionaries and the ordering of dictionaries in the result will be random. When applied to the input it will produce following result:
[
{'id': 7, 'value': [5], 'key': [17], 'user': 578},
{'id': 7, 'value': [3, 4], 'key': [16, 17], 'user': 3},
{'id': 48, 'value': [1], 'key': [52], 'user': 3},
{'id': 48, 'value': [2], 'key': [46], 'user': 578}
]
Let dicts be your original list of dictionaries. This idea maps unique combinations of user and id to defaultdict(list) objects. The final result will be the list of values from that dictionary.
from collections import defaultdict
tmp = defaultdict(dict)
for info in dicts:
tmp[(info['user'], info['id'])].setdefault('key', []).append(info['key'])
tmp[(info['user'], info['id'])].setdefault('value', []).append(info['value'])
for (user, id_), d in tmp.items(): # python2: use iteritems
d.update(dict(user=user, id=id_))
result = list(tmp.values()) # python2: tmp.values() already gives a list
del tmp
You can use following aggregate function:
def aggregate(lst):
new = {}
for d in lst:
new.setdefault((d['user'], d['id']), []).append(d)
for k, d in new.items():
if len(d) > 1:
keys, values = zip(*[(sub['key'], sub['value']) for sub in d])
user, id_ = k
yield {'key': keys, 'value': values, 'user': user, 'id': id_}
else:
yield d[0]
print list(aggregate(lst))
[{'id': 7, 'value': 5, 'key': 17, 'user': 578},
{'id': 7, 'value': (3, 4), 'key': (16, 17), 'user': 3},
{'id': 48, 'value': 1, 'key': 52, 'user': 3},
{'id': 48, 'value': 2, 'key': 46, 'user': 578}]

Categories

Resources