I want to make a dictionary in good order - python

I want to parse excel and put data in the model(User).Now I want to make a dictionary which has excel data. Excel is
I wrote in views.py
#coding:utf-8
from django.shortcuts import render
import xlrd
def try_to_int(arg):
try:
return int(arg)
except:
return arg
def main():
book3 = xlrd.open_workbook('./data/excel1.xlsx')
sheet3 = book3.sheet_by_index(0)
data_dict = {}
tag_list = sheet3.row_values(0)[1:]
for row_index in range(1, sheet3.nrows):
row = sheet3.row_values(row_index)[1:]
row = list(map(try_to_int, row))
data_dict[row_index] = dict(zip(tag_list, row))
print(data_dict)
main()
and it printed out {1: {'A': '', 'area': 'New York', 'C': 0, 'name': 'Blear', 'B': ''}, 2: {'A': '', 'area': 'Chicago', 'C': '', 'name': '', 'B': 0}, 3: {'A': 0, 'area': 'London', 'C': '', 'name': '', 'B': ''}, 4: {'A': '', 'area': 'Singapore', 'C': '', 'name': 'Tom', 'B': ''}, 5: {'A': 0, 'area': 'Delhi', 'C': '', 'name': '', 'B': ''}, 6: {'A': '', 'area': 'Beijing', 'C': 1, 'name': '', 'B': ''}}
But
I cannot understand why output dictionary is mess.I want to get a dictionary like
{1: {'name': 'Blear', 'area': 'New York', 'A': '', 'B': '', 'C': 0},
1: {'name': 'Blear', 'area': 'Chicago', 'A': '', 'B': 0, 'C': ''},
1: {'name': 'Blear', 'area': 'London', 'A': 50, 'B': 0, 'C': ''},
2: {'name': 'Tom', 'area': 'Singapore', 'A': '', 'B': '', 'C': ''}}・・・
What is wrong in my code? How should I fix this?

Related

How to pass multiple values into a dictionary

so i have the following code:
{'stu_name': 'Abel', 'sex': 'male', 'score': 90, 'cls_id': 1},
{'stu_name': 'Carl', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Cecil', 'sex': 'female', 'score': 60, 'cls_id': 1},
{'stu_name': 'Elijah', 'sex': 'female', 'score': 70, 'cls_id': 2},
{'stu_name': 'Dick', 'sex': 'male', 'score': 90, 'cls_id': 3},
{'stu_name': 'Donald', 'sex': 'male', 'score': 80, 'cls_id': 3},
{'stu_name': 'Jack', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Laurent', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Rex', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Tom', 'sex': 'male', 'score': 70, 'cls_id': 2},
{'stu_name': 'Roy', 'sex': 'female', 'score': 90, 'cls_id': 3},
{'stu_name': 'Steve', 'sex': 'male', 'score': 70, 'cls_id': 1}
]
cls_list = [
{'id': 1, 'cls_name': 'Class One'},
{'id': 2, 'cls_name': 'Class Two'},
{'id': 3, 'cls_name': 'Class Three'},
{'id': 4, 'cls_name': 'Class Four'}
]
What i want, is to make the program output the class name, followed by the number of people in it, followed by the number of males in it and finally followed by the number of females. What I have so far is this:
lst_empty = {}
for cls in cls_list:
lst_empty.setdefault(cls['cls_name'], 0)
for stu in stu_list:
if stu['cls_id'] == cls['id']:
lst_empty[cls['cls_name']] +=1
if stu['sex'] == 'male':
since setdefault can only get me 2 items, I'm kind of stuck on what to do with the if sex = male bit.
I guess my main question is how do I pass in the number of males into the empty dictionary.
I don't want to drastically change my code, I just want to know what I can do with the code I have right now.
Thanks!
The easiest, yet not ideal solution is:
for cls in cls_list:
cls_students = list(filter(lambda s: s['cls_id'] == cls['id'], stu_list))
cls_males = list(filter(lambda s: s['sex'] == 'male' , cls_students))
cls_females = list(filter(lambda s: s['sex'] == 'female' , cls_students))
print(cls['cls_name'])
print(f"Total students: {len(cls_students)}")
print(f"Num of males: {len(cls_males) }")
print(f"Num of females: {len(cls_females) }")
But you better consider pandas library with its convenient DataFrames
Just use dict.get:
dct = {}
for stu in stu_list:
if stu['cls_id'] == cls['id']:
dct[cls['cls_name']] = dct.get(cls['cls_name'], 0) + 1
# will handle both 'male' and 'female' cases
dct[stu['sex']] = dct.get(stu['sex'], 0) + 1
lst_empty.append(dct)
You could setdefault an empty list or another dictionnary.
lst_empty.setdefault(cls['cls_name'], [0, 0, 0])
or
lst_empty.setdefault(cls['cls_name'], {"nb_people" : 0, "nb_male" : 0, "nb_female" : 0})
I have made lst_empty as a list of dicts. The code is as follows:
stu_list = [{'stu_name': 'Abel', 'sex': 'male', 'score': 90, 'cls_id': 1},
{'stu_name': 'Carl', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Cecil', 'sex': 'female', 'score': 60, 'cls_id': 1},
{'stu_name': 'Elijah', 'sex': 'female', 'score': 70, 'cls_id': 2},
{'stu_name': 'Dick', 'sex': 'male', 'score': 90, 'cls_id': 3},
{'stu_name': 'Donald', 'sex': 'male', 'score': 80, 'cls_id': 3},
{'stu_name': 'Jack', 'sex': 'male', 'score': 80, 'cls_id': 2},
{'stu_name': 'Laurent', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Rex', 'sex': 'female', 'score': 90, 'cls_id': 1},
{'stu_name': 'Tom', 'sex': 'male', 'score': 70, 'cls_id': 2},
{'stu_name': 'Roy', 'sex': 'female', 'score': 90, 'cls_id': 3},
{'stu_name': 'Steve', 'sex': 'male', 'score': 70, 'cls_id': 1}]
cls_list = [{'id': 1, 'cls_name': 'Class One'},
{'id': 2, 'cls_name': 'Class Two'},
{'id': 3, 'cls_name': 'Class Three'},
{'id': 4, 'cls_name': 'Class Four'}]
lst_empty = []
for cls in cls_list:
dct = {}
dct.setdefault(cls['cls_name'], 0)
dct.setdefault('male', 0)
for stu in stu_list:
if stu['cls_id'] == cls['id']:
dct[cls['cls_name']] +=1
if stu['sex'] == 'male':
dct["male"] += 1
dct["female"] = dct[cls["cls_name"]] - dct["male"]
lst_empty.append(dct)
for i in lst_empty:
print(i)
The output is:
{'Class One': 5, 'male': 2, 'female': 3}
{'Class Two': 4, 'male': 3, 'female': 1}
{'Class Three': 3, 'male': 2, 'female': 1}
{'Class Four': 0, 'male': 0, 'female': 0}
You can do it this way if you want the result top be stored in a dict:
cls_dict = {}
for cls in cls_list:
cls_dict[cls['id']]={}
cls_dict[cls['id']]['id']=cls['id']
cls_dict[cls['id']]['cls_name']=cls['cls_name']
cls_dict[cls['id']]['population']=0
cls_dict[cls['id']]['males']=0
cls_dict[cls['id']]['females']=0
for stu in stu_list:
if stu['cls_id'] in cls_dict:
id = stu['cls_id']
if stu['sex']=='female':
cls_dict[id]['females']+=1
cls_dict[id]['population']+=1
elif stu['sex']=='male':
cls_dict[id]['males']+=1
cls_dict[id]['population']+=1
print(cls_dict)
The output will be:
{1: {'id': 1, 'cls_name': 'Class One', 'population': 5, 'males': 2, 'females': 3},
2: {'id': 2, 'cls_name': 'Class Two', 'population': 4, 'males': 3, 'females': 1},
3: {'id': 3, 'cls_name': 'Class Three', 'population': 3, 'males': 2, 'females': 1},
4: {'id': 4, 'cls_name': 'Class Four', 'population': 0, 'males': 0, 'females': 0}}

Update JSON format from other JSON file

I have two files which are a and b. I want to import certain information from data b to data a with the unique id from every response.
data
a= [{'id':'abc23','name':'aa','age':'22',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}},
{'id':'abc25','name':'bb','age':'32',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}},
{'id':'abc60','name':'cc','age':'24',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}}]
b=[{'id':'abc23','read':'2','speak':'abc','write':'2'},
{'id':'abc25','read':'3','speak':'def','write':'3'},
{'id':'abc60','read':'5','speak':'dgf','write':'1'}]
Code that I used to import from b to a :
from pprint import pprint
for dest in a:
for source in b:
if source['id'] == dest['id']:
dest['data'].update(source)
pprint(a)
Output from the code that i used :
[{ 'age': '22',
'data': {'id': 'abc23', 'read': '2', 'speak': 'abc', 'write': '2'},
'id': 'abc23',
'name': 'aa',
'responses': {'a': 1, 'b': 2}},
{ 'age': '32',
'data': {'id': 'abc25', 'read': '3', 'speak': 'def', 'write': '3'},
'id': 'abc25',
'name': 'bb',
'responses': {'a': 1, 'b': 2}},
{ 'age': '24',
'data': {'id': 'abc60', 'read': '5', 'speak': 'dgf', 'write': '1'},
'id': 'abc60',
'name': 'cc',
'responses': {'a': 1, 'b': 2}}]
But... This is the output that I want:
[{'age': '22',
'data': {'read': '2', 'speak': 'abc'},
'id': 'abc23',
'name': 'aa',
'responses': {'a': 1, 'b': 2}},
{'age': '32',
'data': {'read': '3', 'speak': 'def'},
'id': 'abc25',
'name': 'bb',
'responses': {'a': 1, 'b': 2}},
{'age': '24',
'data': {'read': '5', 'speak': 'dgf'},
'id': 'abc60',
'name': 'cc',
'responses': {'a': 1, 'b': 2}}]
It can't work the way you want with your code.
You do
dest['data'].update(source)
where source is
{'id':'abc23','read':'2','speak':'abc','write':'2'}
and dest['data'] is {'read':'','speak':''}.
When you update it will add all key-value pairs to dest['data'] and preserve the ones that won't be overwritten.
from pprint import pprint
for dest in a:
for source in b:
if source['id'] == dest['id']:
dest['data'] = {k: v for k, v in source.items() if k in dest.get('data', {})}
pprint(a)
This one will look for all the fields that are 'updateable' for each case. You might want to hardcode it, depending on your use case.
This is one approach by changing b to a dict for easy lookup.
Ex:
a= [{'id':'abc23','name':'aa','age':'22',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}},
{'id':'abc25','name':'bb','age':'32',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}},
{'id':'abc60','name':'cc','age':'24',
'data':{'read':'','speak':''},
'responses':{'a':1,'b':2}}]
b=[{'id':'abc23','read':'2','speak':'abc','write':'2'},
{'id':'abc25','read':'3','speak':'def','write':'3'},
{'id':'abc60','read':'5','speak':'dgf','write':'1'}]
b = {i.pop('id'): i for i in b} #Convert to dict key = ID & value = `read`, `speak`, `write`
for i in a:
i['data'].update(b[i['id']]) #Update list
print(a)
Output:
[{'age': '22',
'data': {'read': '2', 'speak': 'abc', 'write': '2'},
'id': 'abc23',
'name': 'aa',
'responses': {'a': 1, 'b': 2}},
{'age': '32',
'data': {'read': '3', 'speak': 'def', 'write': '3'},
'id': 'abc25',
'name': 'bb',
'responses': {'a': 1, 'b': 2}},
{'age': '24',
'data': {'read': '5', 'speak': 'dgf', 'write': '1'},
'id': 'abc60',
'name': 'cc',
'responses': {'a': 1, 'b': 2}}]

Iterate over two lists of dicts and create list of tuples without loop

I have two lists of dicts: list1 and list2.
print(list1)
[{'name': 'fooa', 'desc': 'bazv', 'city': 1, 'ID': 1},
{'name': 'bard', 'desc': 'besd', 'city': 2, 'ID': 1},
{'name': 'baer', 'desc': 'bees', 'city': 2, 'ID': 1},
{'name': 'aaaa', 'desc': 'bnbb', 'city': 1, 'ID': 2},
{'name': 'cgcc', 'desc': 'dgdd', 'city': 1, 'ID': 2}]
print(list2)
[{'name': 'foo', 'desc': 'baz', 'city': 1, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 1, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 2, 'ID': 1},
{'name': 'aaa', 'desc': 'bbb', 'city': 1, 'ID': 2},
{'name': 'ccc', 'desc': 'ddd', 'city': 1, 'ID': 2}]
I need a list of tuples that will hold two paired dicts (one dict from each list) with the same city and ID.
I did it with double loop:
list_of_tuples = []
for i in list1:
for j in list2:
if i['ID'] == j['ID'] and i['city'] == j['city']:
list_of_tuples.append((i, j))
print(list_of_tuples)
[({'name': 'fooa', 'desc': 'bazv', 'city': 1, 'ID': 1},
{'name': 'foo', 'desc': 'baz', 'city': 1, 'ID': 1}),
({'name': 'fooa', 'desc': 'bazv', 'city': 1, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 1, 'ID': 1}),
({'name': 'bard', 'desc': 'besd', 'city': 2, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 2, 'ID': 1}),
({'name': 'baer', 'desc': 'bees', 'city': 2, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 2, 'ID': 1}),
({'name': 'aaaa', 'desc': 'bnbb', 'city': 1, 'ID': 2},
{'name': 'aaa', 'desc': 'bbb', 'city': 1, 'ID': 2}),
({'name': 'aaaa', 'desc': 'bnbb', 'city': 1, 'ID': 2},
{'name': 'ccc', 'desc': 'ddd', 'city': 1, 'ID': 2}),
({'name': 'cgcc', 'desc': 'dgdd', 'city': 1, 'ID': 2},
{'name': 'aaa', 'desc': 'bbb', 'city': 1, 'ID': 2}),
({'name': 'cgcc', 'desc': 'dgdd', 'city': 1, 'ID': 2},
{'name': 'ccc', 'desc': 'ddd', 'city': 1, 'ID': 2})]
Question: How to do this in a more pythonic way (without loops)?
You can use itertools.product and filter:
from itertools import product
list1 = [{'name': 'fooa', 'desc': 'bazv', 'city': 1, 'ID': 1},
{'name': 'bard', 'desc': 'besd', 'city': 2, 'ID': 1},
{'name': 'baer', 'desc': 'bees', 'city': 2, 'ID': 1},
{'name': 'aaaa', 'desc': 'bnbb', 'city': 1, 'ID': 2},
{'name': 'cgcc', 'desc': 'dgdd', 'city': 1, 'ID': 2}]
list2 = [{'name': 'foo', 'desc': 'baz', 'city': 1, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 1, 'ID': 1},
{'name': 'bar', 'desc': 'bes', 'city': 2, 'ID': 1},
{'name': 'aaa', 'desc': 'bbb', 'city': 1, 'ID': 2},
{'name': 'ccc', 'desc': 'ddd', 'city': 1, 'ID': 2}]
def condition(x):
return x[0]['ID'] == x[1]['ID'] and x[0]['city'] == x[1]['city']
list_of_tuples = list(filter(condition, product(list1, list2)))
This is a problem well suited for pandas. If you convert the lists to DataFrames, matching the records on ID and city is the same as an inner join of the two DataFrames.
import pandas as pd
# convert lists to DataFrames
df1 = pd.DataFrame(list1)
df2 = pd.DataFrame(list2)
# merge the two DataFrames
print(df1.merge(df2, on=["ID", "city"]))
# ID city desc_x name_x desc_y name_y
#0 1 1 bazv fooa baz foo
#1 1 1 bazv fooa bes bar
#2 1 2 besd bard bes bar
#3 1 2 bees baer bes bar
#4 2 1 bnbb aaaa bbb aaa
#5 2 1 bnbb aaaa ddd ccc
#6 2 1 dgdd cgcc bbb aaa
#7 2 1 dgdd cgcc ddd ccc
Now you have the matched records in each row. Since the desc and name columns were present in both (and not used for the merge), they get subscripted with _x and _y to differentiate between the two souce DataFrames.
You just need to reformat it to be in your desired output. You can achieve this using to_dict and a list comprehension:
list_of_tuples = [
(
{"name": r["name_x"], "desc": r["desc_x"], "city": r["city"], "ID": r["ID"]},
{"name": r["name_y"], "desc": r["desc_y"], "city": r["city"], "ID": r["ID"]}
) for r in df1.merge(df2, on=["ID", "city"]).to_dict(orient="records")
]
print(list_of_tuples)
#[({'ID': 1, 'city': 1, 'desc': 'bazv', 'name': 'fooa'},
# {'ID': 1, 'city': 1, 'desc': 'baz', 'name': 'foo'}),
# ({'ID': 1, 'city': 1, 'desc': 'bazv', 'name': 'fooa'},
# {'ID': 1, 'city': 1, 'desc': 'bes', 'name': 'bar'}),
# ({'ID': 1, 'city': 2, 'desc': 'besd', 'name': 'bard'},
# {'ID': 1, 'city': 2, 'desc': 'bes', 'name': 'bar'}),
# ({'ID': 1, 'city': 2, 'desc': 'bees', 'name': 'baer'},
# {'ID': 1, 'city': 2, 'desc': 'bes', 'name': 'bar'}),
# ({'ID': 2, 'city': 1, 'desc': 'bnbb', 'name': 'aaaa'},
# {'ID': 2, 'city': 1, 'desc': 'bbb', 'name': 'aaa'}),
# ({'ID': 2, 'city': 1, 'desc': 'bnbb', 'name': 'aaaa'},
# {'ID': 2, 'city': 1, 'desc': 'ddd', 'name': 'ccc'}),
# ({'ID': 2, 'city': 1, 'desc': 'dgdd', 'name': 'cgcc'},
# {'ID': 2, 'city': 1, 'desc': 'bbb', 'name': 'aaa'}),
# ({'ID': 2, 'city': 1, 'desc': 'dgdd', 'name': 'cgcc'},
# {'ID': 2, 'city': 1, 'desc': 'ddd', 'name': 'ccc'})]
Having nested loops is not "not pythonic". However, you can achieve the same result with a list comprehension. I don't think it's more readable though:
[(i, j) for j in list2 for i in list1 if i['ID'] == j['ID'] and i['city'] == j['city']]

Sort a list of dict with a key from another list of dict

In the following example, I would like to sort the animals by the alphabetical order of their category, which is stored in an order dictionnary.
category = [{'uid': 0, 'name': 'mammals'},
{'uid': 1, 'name': 'birds'},
{'uid': 2, 'name': 'fish'},
{'uid': 3, 'name': 'reptiles'},
{'uid': 4, 'name': 'invertebrates'},
{'uid': 5, 'name': 'amphibians'}]
animals = [{'name': 'horse', 'category': 0},
{'name': 'whale', 'category': 2},
{'name': 'mollusk', 'category': 4},
{'name': 'tuna ', 'category': 2},
{'name': 'worms', 'category': 4},
{'name': 'frog', 'category': 5},
{'name': 'dog', 'category': 0},
{'name': 'salamander', 'category': 5},
{'name': 'horse', 'category': 0},
{'name': 'octopus', 'category': 4},
{'name': 'alligator', 'category': 3},
{'name': 'monkey', 'category': 0},
{'name': 'kangaroos', 'category': 0},
{'name': 'salmon', 'category': 2}]
sorted_animals = sorted(animals, key=lambda k: (k['category'])
How could I achieve this?
Thanks.
You are now sorting on the category id. All you need to do is map that id to a lookup for a given category name.
Create a dictionary for the categories first so you can directly map the numeric id to the associated name from the category list, then use that mapping when sorting:
catuid_to_name = {c['uid']: c['name'] for c in category}
sorted_animals = sorted(animals, key=lambda k: catuid_to_name[k['category']])
Demo:
>>> from pprint import pprint
>>> category = [{'uid': 0, 'name': 'mammals'},
... {'uid': 1, 'name': 'birds'},
... {'uid': 2, 'name': 'fish'},
... {'uid': 3, 'name': 'reptiles'},
... {'uid': 4, 'name': 'invertebrates'},
... {'uid': 5, 'name': 'amphibians'}]
>>> animals = [{'name': 'horse', 'category': 0},
... {'name': 'whale', 'category': 2},
... {'name': 'mollusk', 'category': 4},
... {'name': 'tuna ', 'category': 2},
... {'name': 'worms', 'category': 4},
... {'name': 'frog', 'category': 5},
... {'name': 'dog', 'category': 0},
... {'name': 'salamander', 'category': 5},
... {'name': 'horse', 'category': 0},
... {'name': 'octopus', 'category': 4},
... {'name': 'alligator', 'category': 3},
... {'name': 'monkey', 'category': 0},
... {'name': 'kangaroos', 'category': 0},
... {'name': 'salmon', 'category': 2}]
>>> catuid_to_name = {c['uid']: c['name'] for c in category}
>>> pprint(catuid_to_name)
{0: 'mammals',
1: 'birds',
2: 'fish',
3: 'reptiles',
4: 'invertebrates',
5: 'amphibians'}
>>> sorted_animals = sorted(animals, key=lambda k: catuid_to_name[k['category']])
>>> pprint(sorted_animals)
[{'category': 5, 'name': 'frog'},
{'category': 5, 'name': 'salamander'},
{'category': 2, 'name': 'whale'},
{'category': 2, 'name': 'tuna '},
{'category': 2, 'name': 'salmon'},
{'category': 4, 'name': 'mollusk'},
{'category': 4, 'name': 'worms'},
{'category': 4, 'name': 'octopus'},
{'category': 0, 'name': 'horse'},
{'category': 0, 'name': 'dog'},
{'category': 0, 'name': 'horse'},
{'category': 0, 'name': 'monkey'},
{'category': 0, 'name': 'kangaroos'},
{'category': 3, 'name': 'alligator'}]
Note that within each category, the dictionaries have been left in relative input order. You could return a tuple of values from the sorting key to further apply a sorting order within each category, e.g.:
sorted_animals = sorted(
animals,
key=lambda k: (catuid_to_name[k['category']], k['name'])
)
would sort by animal name within each category, producing:
>>> pprint(sorted(animals, key=lambda k: (catuid_to_name[k['category']], k['name'])))
[{'category': 5, 'name': 'frog'},
{'category': 5, 'name': 'salamander'},
{'category': 2, 'name': 'salmon'},
{'category': 2, 'name': 'tuna '},
{'category': 2, 'name': 'whale'},
{'category': 4, 'name': 'mollusk'},
{'category': 4, 'name': 'octopus'},
{'category': 4, 'name': 'worms'},
{'category': 0, 'name': 'dog'},
{'category': 0, 'name': 'horse'},
{'category': 0, 'name': 'horse'},
{'category': 0, 'name': 'kangaroos'},
{'category': 0, 'name': 'monkey'},
{'category': 3, 'name': 'alligator'}]
imo your category structure is far too complicated - at least as long as the uid is nothing but the index, you could simply use a list for that:
category = [c['name'] for c in category]
# ['mammals', 'birds', 'fish', 'reptiles', 'invertebrates', 'amphibians']
sorted_animals = sorted(animals, key=lambda k: category[k['category']])
#[{'name': 'frog', 'category': 5}, {'name': 'salamander', 'category': 5}, {'name': 'whale', 'category': 2}, {'name': 'tuna ', 'category': 2}, {'name': 'salmon', 'category': 2}, {'name': 'mollusk', 'category': 4}, {'name': 'worms', 'category': 4}, {'name': 'octopus', 'category': 4}, {'name': 'horse', 'category': 0}, {'name': 'dog', 'category': 0}, {'name': 'horse', 'category': 0}, {'name': 'monkey', 'category': 0}, {'name': 'kangaroos', 'category': 0}, {'name': 'alligator', 'category': 3}]

Python, collect data from an array of dicts

I'm new with Python and I have this structure achieved from a DB
data=[
{'Value': '0.2', 'id': 1},
{'Value': '1.2', 'id': 1},
{'Value': '33.34', 'id': 2},
{'Value': '44.3', 'id': 3},
{'Value': '33.23', 'id': 3},
{'Value': '21.1', 'id': 4},
{'Value': '5.33', 'id': 4},
{'Value': '33.3', 'id': 5},
{'Value': '12.2', 'id': 5},
{'Value': '1.22', 'id': 5},
{'Value': '1.23', 'id': 6}
]
I know that I can get the id of a record with:
data[i]['id']
but I need to collect by ID in a proper data structure, in order to get the average values for every ID.
What is the better choice for this?
I'm thinking build a new dict for every ID set, but the IDs can grow in number, and I don't figure out how tackle this problem. If someone can give me some idea I would be very grateful.
Assuming your data is sorted by ID as it appears in your data variable, you can try using itertools.groupby, which can be instructed to group by id. You can then create a new dictionary that has keys equal to the id numbers and values equal to the means:
In [1]: from itertools import groupby
In [2]: data=[
...: {'Value': '0.2', 'id': 1},
...: {'Value': '1.2', 'id': 1},
...: {'Value': '33.34', 'id': 2},
...: {'Value': '44.3', 'id': 3},
...: {'Value': '33.23', 'id': 3},
...: {'Value': '21.1', 'id': 4},
...: {'Value': '5.33', 'id': 4},
...: {'Value': '33.3', 'id': 5},
...: {'Value': '12.2', 'id': 5},
...: {'Value': '1.22', 'id': 5},
...: {'Value': '1.23', 'id': 6}
...: ]
In [3]: means = {}
In [4]: for k, g in groupby(data, key=lambda x: x['id']):
...: g = list(g)
...: means[k] = sum(float(x['Value']) for x in g) / len(g)
...:
...:
In [5]: means
Out[5]:
{1: 0.69999999999999996,
2: 33.340000000000003,
3: 38.765000000000001,
4: 13.215,
5: 15.573333333333332,
6: 1.23}
(Updated: after DSM's comment.)
You could reshape the data like this:
from collections import defaultdict
data=[
{'Value': '0.2', 'id': 1},
{'Value': '1.2', 'id': 1},
{'Value': '33.34', 'id': 2},
{'Value': '44.3', 'id': 3},
{'Value': '33.23', 'id': 3},
{'Value': '21.1', 'id': 4},
{'Value': '5.33', 'id': 4},
{'Value': '33.3', 'id': 5},
{'Value': '12.2', 'id': 5},
{'Value': '1.22', 'id': 5},
{'Value': '1.23', 'id': 6}
]
newdata = defaultdict(list)
for r in data:
newdata[r['id']].append(float(r['Value']))
This would yield:
In [2]: newdata
Out[2]: defaultdict(<type 'list'>, {1: [0.2, 1.2], 2: [33.34], 3: [44.3, 33.23], 4: [21.1, 5.33], 5: [33.3, 12.2, 1.22], 6: [1.23]})
(Update 2)
Calculating the means is now simple with a dictionary comprehension:
mean = {id: sum(values) / len(values) for id, values in newdata.viewitems()}
Which gives:
In [4]: mean
Out[4]: {1: 0.7, 2: 33.34, 3: 38.765, 4: 13.215, 5: 15.573333333333332, 6: 1.23}
If you have numpy, you could use it for this easily:
import numpy
numpy.mean([x['id'] for x in data])
Otherwise, it would be as simple as:
from __future__ import division # if python2.7
ids = [x['id'] for x in data]
print sum(ids)/len(ids)
You can simply create a list of IDs after all have been collected:
id_list = [element['id'] for element in data]
From there you can calculate whatever you want.

Categories

Resources