so i have the following code:
{'stu_name': 'Abel', 'sex': 'male', 'score': 90, 'cls_id': 1},
{'stu_name': 'Carl', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Cecil', 'sex': 'female', 'score': 60, 'cls_id': 1},
{'stu_name': 'Elijah', 'sex': 'female', 'score': 70, 'cls_id': 2},
{'stu_name': 'Dick', 'sex': 'male', 'score': 90, 'cls_id': 3},
{'stu_name': 'Donald', 'sex': 'male', 'score': 80, 'cls_id': 3},
{'stu_name': 'Jack', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Laurent', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Rex', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Tom', 'sex': 'male', 'score': 70, 'cls_id': 2},
{'stu_name': 'Roy', 'sex': 'female', 'score': 90, 'cls_id': 3},
{'stu_name': 'Steve', 'sex': 'male', 'score': 70, 'cls_id': 1}
]
cls_list = [
{'id': 1, 'cls_name': 'Class One'},
{'id': 2, 'cls_name': 'Class Two'},
{'id': 3, 'cls_name': 'Class Three'},
{'id': 4, 'cls_name': 'Class Four'}
]
What i want, is to make the program output the class name, followed by the number of people in it, followed by the number of males in it and finally followed by the number of females. What I have so far is this:
lst_empty = {}
for cls in cls_list:
lst_empty.setdefault(cls['cls_name'], 0)
for stu in stu_list:
if stu['cls_id'] == cls['id']:
lst_empty[cls['cls_name']] +=1
if stu['sex'] == 'male':
since setdefault can only get me 2 items, I'm kind of stuck on what to do with the if sex = male bit.
I guess my main question is how do I pass in the number of males into the empty dictionary.
I don't want to drastically change my code, I just want to know what I can do with the code I have right now.
Thanks!
The easiest, yet not ideal solution is:
for cls in cls_list:
cls_students = list(filter(lambda s: s['cls_id'] == cls['id'], stu_list))
cls_males = list(filter(lambda s: s['sex'] == 'male' , cls_students))
cls_females = list(filter(lambda s: s['sex'] == 'female' , cls_students))
print(cls['cls_name'])
print(f"Total students: {len(cls_students)}")
print(f"Num of males: {len(cls_males) }")
print(f"Num of females: {len(cls_females) }")
But you better consider pandas library with its convenient DataFrames
Just use dict.get:
dct = {}
for stu in stu_list:
if stu['cls_id'] == cls['id']:
dct[cls['cls_name']] = dct.get(cls['cls_name'], 0) + 1
# will handle both 'male' and 'female' cases
dct[stu['sex']] = dct.get(stu['sex'], 0) + 1
lst_empty.append(dct)
You could setdefault an empty list or another dictionnary.
lst_empty.setdefault(cls['cls_name'], [0, 0, 0])
or
lst_empty.setdefault(cls['cls_name'], {"nb_people" : 0, "nb_male" : 0, "nb_female" : 0})
I have made lst_empty as a list of dicts. The code is as follows:
stu_list = [{'stu_name': 'Abel', 'sex': 'male', 'score': 90, 'cls_id': 1},
{'stu_name': 'Carl', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Cecil', 'sex': 'female', 'score': 60, 'cls_id': 1},
{'stu_name': 'Elijah', 'sex': 'female', 'score': 70, 'cls_id': 2},
{'stu_name': 'Dick', 'sex': 'male', 'score': 90, 'cls_id': 3},
{'stu_name': 'Donald', 'sex': 'male', 'score': 80, 'cls_id': 3},
{'stu_name': 'Jack', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Laurent', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Rex', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Tom', 'sex': 'male', 'score': 70, 'cls_id': 2},
{'stu_name': 'Roy', 'sex': 'female', 'score': 90, 'cls_id': 3},
{'stu_name': 'Steve', 'sex': 'male', 'score': 70, 'cls_id': 1}]
cls_list = [{'id': 1, 'cls_name': 'Class One'},
{'id': 2, 'cls_name': 'Class Two'},
{'id': 3, 'cls_name': 'Class Three'},
{'id': 4, 'cls_name': 'Class Four'}]
lst_empty = []
for cls in cls_list:
dct = {}
dct.setdefault(cls['cls_name'], 0)
dct.setdefault('male', 0)
for stu in stu_list:
if stu['cls_id'] == cls['id']:
dct[cls['cls_name']] +=1
if stu['sex'] == 'male':
dct["male"] += 1
dct["female"] = dct[cls["cls_name"]] - dct["male"]
lst_empty.append(dct)
for i in lst_empty:
print(i)
The output is:
{'Class One': 5, 'male': 2, 'female': 3}
{'Class Two': 4, 'male': 3, 'female': 1}
{'Class Three': 3, 'male': 2, 'female': 1}
{'Class Four': 0, 'male': 0, 'female': 0}
You can do it this way if you want the result top be stored in a dict:
cls_dict = {}
for cls in cls_list:
cls_dict[cls['id']]={}
cls_dict[cls['id']]['id']=cls['id']
cls_dict[cls['id']]['cls_name']=cls['cls_name']
cls_dict[cls['id']]['population']=0
cls_dict[cls['id']]['males']=0
cls_dict[cls['id']]['females']=0
for stu in stu_list:
if stu['cls_id'] in cls_dict:
id = stu['cls_id']
if stu['sex']=='female':
cls_dict[id]['females']+=1
cls_dict[id]['population']+=1
elif stu['sex']=='male':
cls_dict[id]['males']+=1
cls_dict[id]['population']+=1
print(cls_dict)
The output will be:
{1: {'id': 1, 'cls_name': 'Class One', 'population': 5, 'males': 2, 'females': 3},
2: {'id': 2, 'cls_name': 'Class Two', 'population': 4, 'males': 3, 'females': 1},
3: {'id': 3, 'cls_name': 'Class Three', 'population': 3, 'males': 2, 'females': 1},
4: {'id': 4, 'cls_name': 'Class Four', 'population': 0, 'males': 0, 'females': 0}}
Related
I have a problem, I have a list like this:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1}, {'id': 10, 'questionid': 5,
'text': 'test answer updated', 'score': 2}, {'id': 20, 'questionid': 5, 'text': 'no',
'score': 0}, {'id': 35, 'questionid': 5, 'text': 'yes', 'score': 1}]
and I want remove duplicate "questionid", "text" and "score", for example in this case I want output like this:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1}, {'id': 10, 'questionid': 5,
'text': 'test answer updated', 'score': 2}, {'id': 20, 'questionid': 5, 'text': 'no',
'score': 0}]
How can I get this output in python?
We could create dictionary that has "questionid", "text" and "score" tuple as key and dicts as values and use this dictionary to check for duplicate values in data:
from operator import itemgetter
out = {}
for d in data:
key = itemgetter("questionid", "text", "score")(d)
if key not in out:
out[key] = d
out = list(out.values())
Output:
[{'id': 34, 'questionid': 5, 'text': 'yes', 'score': 1},
{'id': 10, 'questionid': 5, 'text': 'test answer updated', 'score': 2},
{'id': 20, 'questionid': 5, 'text': 'no', 'score': 0}]
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
I have a list of dicts (same format) like this :
L = [
{'id': 1, 'name': 'john', 'age': 34},
{'id': 1, 'name': 'john', 'age': 34},
{'id': 2, 'name': 'hanna', 'age': 30},
{'id': 2, 'name': 'hanna', 'age': 30},
{'id': 3, 'name': 'stack', 'age': 40}
]
I want to remove duplication and get the number of this duplication like this
[
{'id': 1, 'name': 'john', 'age': 34, 'duplication': 2},
{'id': 2, 'name': 'hanna', 'age': 30, 'duplication': 2},
{'id': 3, 'name': 'stack', 'age': 40, 'duplication': 1}
]
I already managed to remove the duplication by using a set.... but I can't get the number of duplications
my code :
no_duplication = [dict(s) for s in set(frozenset(d.items()) for d in L)]
no_duplication = [
{'id': 1, 'name': 'john', 'age': 34},
{'id': 2, 'name': 'hanna', 'age': 30},
{'id': 3, 'name': 'stack', 'age': 40}
]
Here is a solution you can give a try using collections.Counter,
from collections import Counter
print([
{**dict(k), "duplicated": v}
for k, v in Counter(frozenset(i.items()) for i in L).items()
])
[{'age': 34, 'duplicated': 2, 'id': 1, 'name': 'john'},
{'age': 30, 'duplicated': 2, 'id': 2, 'name': 'hanna'},
{'age': 40, 'duplicated': 1, 'id': 3, 'name': 'stack'}]
ar = [
{'id': 1, 'name': 'john', 'age': 34},
{'id': 1, 'name': 'john', 'age': 34},
{'id': 2, 'name': 'hanna', 'age': 30},
{'id': 2, 'name': 'hanna', 'age': 30},
{'id': 3, 'name': 'stack', 'age': 40}
]
br = []
cnt = []
for i in ar:
if i not in br:
br.append(i)
cnt.append(1)
else:
cnt[br.index(i)] += 1
for i in range(len(br)):
br[i]['duplication'] = cnt[i]
The desired output is contained in br as:
[
{'id': 1, 'name': 'john', 'age': 34, 'duplication': 2},
{'id': 2, 'name': 'hanna', 'age': 30, 'duplication': 2},
{'id': 3, 'name': 'stack', 'age': 40, 'duplication': 1}
]
I have a list of dictionaries and I need to count duplicates by specific keys.
For example:
[
{'name': 'John', 'age': 10, 'country': 'USA', 'height': 185},
{'name': 'John', 'age': 10, 'country': 'Canada', 'height': 185},
{'name': 'Mark', 'age': 10, 'country': 'USA', 'height': 180},
{'name': 'Mark', 'age': 10, 'country': 'Canada', 'height': 180},
{'name': 'Doe', 'age': 15, 'country': 'Canada', 'height': 185}
]
If will specify 'age' and 'country' it should return
[
{
'age': 10,
'country': 'USA',
'count': 2
},
{
'age': 10,
'country': 'Canada',
'count': 2
},
{
'age': 15,
'country': 'Canada',
'count': 1
}
]
Or if I will specify 'name' and 'height':
[
{
'name': 'John',
'height': 185,
'count': 2
},
{
'name': 'Mark',
'height': 180,
'count': 2
},
{
'name': 'Doe',
'heigth': 185,
'count': 1
}
]
Maybe there is a way to implement this by Counter?
You can use itertools.groupby with sorted list:
>>> data = [
{'name': 'John', 'age': 10, 'country': 'USA', 'height': 185},
{'name': 'John', 'age': 10, 'country': 'Canada', 'height': 185},
{'name': 'Mark', 'age': 10, 'country': 'USA', 'height': 180},
{'name': 'Mark', 'age': 10, 'country': 'Canada', 'height': 180},
{'name': 'Doe', 'age': 15, 'country': 'Canada', 'height': 185}
]
>>> from itertools import groupby
>>> key = 'age', 'country'
>>> list_sorter = lambda x: tuple(x[k] for k in key)
>>> grouper = lambda x: tuple(x[k] for k in key)
>>> result = [
{**dict(zip(key, k)), 'count': len([*g])}
for k, g in
groupby(sorted(data, key=list_sorter), grouper)
]
>>> result
[{'age': 10, 'country': 'Canada', 'count': 2},
{'age': 10, 'country': 'USA', 'count': 2},
{'age': 15, 'country': 'Canada', 'count': 1}]
>>> key = 'name', 'height'
>>> result = [
{**dict(zip(key, k)), 'count': len([*g])}
for k, g in
groupby(sorted(data, key=list_sorter), grouper)
]
>>> result
[{'name': 'Doe', 'height': 185, 'count': 1},
{'name': 'John', 'height': 185, 'count': 2},
{'name': 'Mark', 'height': 180, 'count': 2}]
If you use pandas then you can use, pandas.DataFrame.groupby, pandas.groupby.size, pandas.Series.to_frame, pandas.DataFrame.reset_index and finally pandas.DataFrame.to_dict with orient='records':
>>> import pandas as pd
>>> df = pd.DataFrame(data)
>>> df.groupby(list(key)).size().to_frame('count').reset_index().to_dict('records')
[{'name': 'Doe', 'height': 185, 'count': 1},
{'name': 'John', 'height': 185, 'count': 2},
{'name': 'Mark', 'height': 180, 'count': 2}]
I have a list of dicts like below:
lod = [
{'name': 'Tom', 'score': 60},
{'name': 'Tim', 'score': 70},
{'name': 'Tam', 'score': 80},
{'name': 'Tem', 'score': 90}
]
I want to get {'name': 'Tem', 'score':90} but I only can do below:
max(x['score'] for x in lod)
This only return the value 90.
How can I get the whole dict?
You can use the key function of max:
>>> lod = [
... {'name': 'Tom', 'score': 60},
... {'name': 'Tim', 'score': 70},
... {'name': 'Tam', 'score': 80},
... {'name': 'Tem', 'score': 90}
... ]
...
>>> max(lod, key=lambda x: x['score'])
{'name': 'Tem', 'score': 90}
Just pass your list to max, like this:
>>> from operator import itemgetter
>>> lod = [
... {'name': 'Tom', 'score': 60},
... {'name': 'Tim', 'score': 70},
... {'name': 'Tam', 'score': 80},
... {'name': 'Tem', 'score': 90}
... ]
>>> max(lod, key=itemgetter('score'))
{'score': 90, 'name': 'Tem'}
I dont know whether sorting is time consuming,
>>>sorted(lod, key=lambda x:x['score'])[-1]
{'name': 'Tem', 'score': 90}
Ok, so I have a list of dicts:
[{'name': 'johnny', 'surname': 'smith', 'age': 53},
{'name': 'johnny', 'surname': 'ryan', 'age': 13},
{'name': 'jakob', 'surname': 'smith', 'age': 27},
{'name': 'aaron', 'surname': 'specter', 'age': 22},
{'name': 'max', 'surname': 'headroom', 'age': 108},
]
and I want the 'frequency' of the items within each column. So for this I'd get something like:
{'name': {'johnny': 2, 'jakob': 1, 'aaron': 1, 'max': 1},
'surname': {'smith': 2, 'ryan': 1, 'specter': 1, 'headroom': 1},
'age': {53:1, 13:1, 27: 1. 22:1, 108:1}}
Any modules out there that can do stuff like this?
collections.defaultdict from the standard library to the rescue:
from collections import defaultdict
LofD = [{'name': 'johnny', 'surname': 'smith', 'age': 53},
{'name': 'johnny', 'surname': 'ryan', 'age': 13},
{'name': 'jakob', 'surname': 'smith', 'age': 27},
{'name': 'aaron', 'surname': 'specter', 'age': 22},
{'name': 'max', 'surname': 'headroom', 'age': 108},
]
def counters():
return defaultdict(int)
def freqs(LofD):
r = defaultdict(counters)
for d in LofD:
for k, v in d.items():
r[k][v] += 1
return dict((k, dict(v)) for k, v in r.items())
print freqs(LofD)
emits
{'age': {27: 1, 108: 1, 53: 1, 22: 1, 13: 1}, 'surname': {'headroom': 1, 'smith': 2, 'specter': 1, 'ryan': 1}, 'name': {'jakob': 1, 'max': 1, 'aaron': 1, 'johnny': 2}}
as desired (order of keys apart, of course -- it's irrelevant in a dict).
items = [{'name': 'johnny', 'surname': 'smith', 'age': 53}, {'name': 'johnny', 'surname': 'ryan', 'age': 13}, {'name': 'jakob', 'surname': 'smith', 'age': 27}, {'name': 'aaron', 'surname': 'specter', 'age': 22}, {'name': 'max', 'surname': 'headroom', 'age': 108}]
global_dict = {}
for item in items:
for key, value in item.items():
if not global_dict.has_key(key):
global_dict[key] = {}
if not global_dict[key].has_key(value):
global_dict[key][value] = 0
global_dict[key][value] += 1
print global_dict
Simplest solution and actually tested.
New in Python 3.1: The collections.Counter class:
mydict=[{'name': 'johnny', 'surname': 'smith', 'age': 53},
{'name': 'johnny', 'surname': 'ryan', 'age': 13},
{'name': 'jakob', 'surname': 'smith', 'age': 27},
{'name': 'aaron', 'surname': 'specter', 'age': 22},
{'name': 'max', 'surname': 'headroom', 'age': 108},
]
import collections
newdict = {}
for key in mydict[0].keys():
l = [value[key] for value in mydict]
newdict[key] = dict(collections.Counter(l))
print(newdict)
outputs:
{'age': {27: 1, 108: 1, 53: 1, 22: 1, 13: 1},
'surname': {'headroom': 1, 'smith': 2, 'specter': 1, 'ryan': 1},
'name': {'jakob': 1, 'max': 1, 'aaron': 1, 'johnny': 2}}
This?
from collections import defaultdict
fq = { 'name': defaultdict(int), 'surname': defaultdict(int), 'age': defaultdict(int) }
for row in listOfDicts:
for field in fq:
fq[field][row[field]] += 1
print fq