Cleanest way to sum list of nested dicts - python

Is there a cleaner/more pythonic way of summing the contents of a list of nested dicts? Here's what I'm doing, but I suspect that there may be a better way:
list_of_nested_dicts = [{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}},
{'class1': {'TP': 1, 'FP': 0, 'FN': 2}, 'class2': {'TP': 0, 'FP': 0, 'FN': 0}, 'class3': {'TP': 0, 'FP': 0, 'FN': 0}, 'class4': {'TP': 1, 'FP': 0, 'FN': 2}}]
total_counts = {k:{'TP': 0, 'FP': 0, 'FN': 0} for k in list_of_nested_dicts[0].keys()}
for d in list_of_nested_dicts:
for label,counts_dict in d.items():
for k,v in counts_dict.items():
total_counts[label][k] += v
print(total_counts)
(Assuming all keys are exactly the same, but values could be any integer)

You can have a slightly tighter code using collections (similar result to #blhsing)
import collections
counts = collections.defaultdict(collections.Counter)
for d in list_of_nested_dicts:
for k, v in d.items():
counts[k].update(v)
This will give you a defaultdict of counters instead of only dicts, but they behave similarly. You can also explicitly cast them to dicts at the end if you want.
{'class1': {'FN': 8, 'FP': 0, 'TP': 4},
'class2': {'FN': 0, 'FP': 0, 'TP': 0},
'class3': {'FN': 0, 'FP': 0, 'TP': 0},
'class4': {'FN': 8, 'FP': 0, 'TP': 4}}
vs
defaultdict(<class 'collections.Counter'>,
{'class1': Counter({'FN': 8, 'TP': 4, 'FP': 0}),
'class2': Counter({'TP': 0, 'FP': 0, 'FN': 0}),
'class3': Counter({'TP': 0, 'FP': 0, 'FN': 0}),
'class4': Counter({'FN': 8, 'TP': 4, 'FP': 0})})

One thing in your code that stands out as "unclean" is the fact that you are hard-coding the keys of the sub-dicts in the initialization of total_counts. You can avoid such hard-coding by using the dict.setdefault and dict.get methods as you iterate over the items of the sub-dicts instead:
total_counts = {}
for d in list_of_nested_dicts:
for label, counts_dict in d.items():
for k, v in counts_dict.items():
total_counts[label][k] = total_counts.setdefault(label, {}).get(k, 0) + v

Related

Merge dicts in one and find mean

I am new in programming.
I have lists of dicts.
[{'Program Analysis': 0},
{'Algorithms': 0},
{'Number systems': 0},
{'Game theory': 1},
{'Algorithms': 1},
{'Number systems': 0},
{'Program Analysis': 0}]
I want to merge all dicts in one and then find mean.
{'Program Analysis': [0, 0]},
'Algorithms': [0, 1],
'Number systems': [0, 0],
'Game theory': 1 }
{'Program Analysis': 0},
'Algorithms': 0.5,
'Number systems': 0,
'Game theory': 1 }
def avg(num_list):
total = 0
for num in num_list:
total += num
return total * 1.0 / len(num_list)
my_list = [{'Program Analysis': 0}, {'Algorithms': 0}, {'Number systems': 0}, {'Game theory': 1}, {'Algorithms': 1}, {'Number systems': 0}, {'Program Analysis': 0}]
result = dict()
for d in my_list:
for name, num in d.items():
if name in result:
result[name].append(num)
else:
result[name] = [num]
for name, num_list in result.items():
result[name] = avg(num_list)
You can use a default dictionary to group the data and use mean function from statistics to calculate it:
from collections import defaultdict
from statistics import mean
data = [
{'Program Analysis': 0},
{'Algorithms': 0},
{'Number systems': 0},
{'Game theory': 1},
{'Algorithms': 1},
{'Number systems': 0},
{'Program Analysis': 0}
]
grouped_data = defaultdict(list)
for d in data:
for k, v in d.items():
grouped_data[k].append(v)
grouped_data = dict(grouped_data)
print(grouped_data)
>>> {'Program Analysis': [0, 0], 'Algorithms': [0, 1], 'Number systems': [0, 0], 'Game theory': [1]}
mean_data = {
k: mean(v) for k, v in grouped_data.items()
}
print(mean_data)
>>> {'Program Analysis': 0, 'Algorithms': 0.5, 'Number systems': 0, 'Game theory': 1}

Cummulative Dictionary

I am trying to write a python function where for each key (the dates), the value would be the sum of that day's result and the previous day(s) (sort of following the same logic as the fibonacci sequence).
For example, I have:
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3': 0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
but I want to have:
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200519: {'Level1': 1, 'Level2': 2, 'Level3': 0, 'Level4': 2}
What I have done until now:
def summing(d):
'''
each key after the first one is the sum of the one before and its own result
>>> {20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0,
'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3':
0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
{20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0,
'Level2': 1, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 1, 'Level3': 0, '
Level4': 1}, 20200519: {'Level1': 1, 'Level2': 2, 'Level3': 0, 'Level4': 2}
'''
#STILL IN PROGRESS
c={}
for key in d:
if key == 20200516:
c[20200516]=d[20200516]
else:
c[key]=d[key-1]+d[key]
return c
You made a good effort, but you can't just add dicts like that. Here's a minimal change to get from your input to desired output, by using dict comprehension to add the value for each entry in the daily record:
from pprint import pprint
def summing_oneday(d1, d2):
return {key: d1[key] + d2[key] for key in d2}
def summing(data):
result = {}
for day in sorted(data.keys()):
if not result:
result[day] = data[day]
else:
result[day] = summing_oneday(previous, data[day])
previous = result[day]
return result
data = {20200516: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 0}, 20200517: {'Level1': 0, 'Level2': 0, 'Level3': 0, 'Level4': 1}, 20200518: {'Level1': 1, 'Level2': 0, 'Level3': 0, 'Level4': 0}, 20200519: {'Level1': 0, 'Level2': 1, 'Level3': 0, 'Level4': 1}}
pprint(summing(data))
I'm assuming all the keys are present on all the daily records. Otherwise we'll have to deal with that.

pandas - pd.replace and TypeError

I have all_data dataframe. I want to replace some categorical values in certain columns with numerical values. I'm trying to use this nested dictionary notation (I've checked that the brackets and curly brackets are in place, I don't think that's the issue):
all_data = all_data.replace({'Street': {'Pave': 1, 'Grvl': 0}},
{'LotShape': {'IR3': 1, 'IR2': 2, 'IR1': 3, 'Reg': 4}},
{'Utilities': {'ELO': 0, 'NoSeWa': 0, 'NoSewr': 0, 'AllPub': 1}},
{'LandSlope': {'Sev': 1, 'Mod': 2, 'Gtl': 3}},
{'ExterQual': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5}},
{'ExterCond': {'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5}},
{'BsmtQual': {'NA': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4,'Ex': 5}},
{'BsmtCond': {'NA': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4,'Ex': 5}},
{'BsmtExposure': {'NA': 0, 'No': 1, 'Mn': 2, 'Av': 3, 'Gd': 4}},
{'BsmtFinType1': {'NA': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6}},
{'BsmtFinType2': {'NA': 0, 'Unf': 1,'LwQ': 2,'Rec': 3, 'BLQ': 4,'ALQ': 5, 'GLQ': 6}},
{'HeatingQC': {'Po': 1,'Fa': 2,'TA': 3,'Gd': 4,'Ex': 5}},
{'CentralAir': {'No': 0,'Yes': 1}},
{'KitchenQual': {'Po': 1,'Fa': 2,'TA': 3,'Gd': 4,'Ex': 5}},
{'Functional': {'Sal': -7,'Sev': -6,'Maj1': -5,'Maj2': -4,'Mod': -3,'Min2': -2,'Min1': -1,
'Typ': 0}},
{'FireplaceQu': {'NA': 0,'Po': 1,'Fa': 2,'TA': 3,'Gd': 4,'Ex': 5}},
{'GarageFinish': {'NA': 0,'Unf': 1,'RFn': 2, 'Fin': 3}},
{'GarageQual': {'NA': 0, 'Po': 1,'Fa': 2, 'TA': 3,'Gd': 4, 'Ex': 5}},
{'GarageCond': {'NA': 0,'Po': 1,'Fa': 2,'TA': 3,'Gd': 4,'Ex': 5}},
{'PavedDrive': {'N': 0,'P': 0, 'Y': 1}},
{'Fence': {'NA': 0, 'MnWw': 1,'GdWo': 2,'MnPrv': 3,'GdPrv': 4}},
{'SaleCondition': {'Abnorml': 1, 'Alloca': 1, 'AdjLand': 1, 'Family': 1, 'Normal': 0,
'Partial': 0}}
)
Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-40-f9c9c28b7237> in <module>()
22 {'Fence': {'NA': 0, 'MnWw': 1,'GdWo': 2,'MnPrv': 3,'GdPrv': 4}},
23 {'SaleCondition': {'Abnorml': 1, 'Alloca': 1, 'AdjLand': 1, 'Family': 1, 'Normal': 0,
---> 24 'Partial': 0}}
25 )
TypeError: replace() takes from 1 to 8 positional arguments but 23 were given
If I remove the 'SaleCondition' row from the above code, the error is again there but this time referring to 'Fence', and so on, for each line of code from bottom up. I've googled but have no idea what this means. Help MUCH appreciated.
You should do something like :
df.replace({'Fence':{'NA': 0, 'MnWw': 1,'GdWo': 2,'MnPrv': 3,'GdPrv': 4},'SaleCondition':{'Abnorml': 1, 'Alloca': 1, 'AdjLand': 1, 'Family': 1, 'Normal': 0,
'Partial': 0}})
the format should be .replace({'col1':{},'col2':{}}) not .replace({'col1':{}},{'col2':{}})

How to change one dictionary value in a dictionary of dictionaries

I am running a variation of the following script:
text1={'file1':0,'file2':0}
text2=['100-200','200-300','300-400']
text3=['1','2','3','4']
level1={}
level2={}
for i in text2:
level1[i]=text1
for n in text3:
level2[n]=level1
level2['3']['100-200']['file1']=level2['3']['100-200']['file1']+1
Unfortunately this changes the dictionary from:
{'1': {'200-300': {'file2': 0, 'file1': 0}, '300-400': {'file2': 0, 'file1': 0}, '100-200': {'file2': 0, 'file1': 0}}, '2': {'200-300': {'file2': 0, 'file1': 0}, '300-400': {'file2': 0, 'file1': 0}, '100-200': {'file2': 0, 'file1': 0}}, '3': {'200-300': {'file2': 0, 'file1': 0}, '300-400': {'file2': 0, 'file1': 0}, '100-200': {'file2': 0, 'file1': 0}}, '4': {'200-300': {'file2': 0, 'file1': 0}, '300-400': {'file2': 0, 'file1': 0}, '100-200': {'file2': 0, 'file1': 0}}}
to:
{'1': {'200-300': {'file2': 0, 'file1': 1}, '300-400': {'file2': 0, 'file1': 1}, '100-200': {'file2': 0, 'file1': 1}}, '2': {'200-300': {'file2': 0, 'file1': 1}, '300-400': {'file2': 0, 'file1': 1}, '100-200': {'file2': 0, 'file1': 1}}, '3': {'200-300': {'file2': 0, 'file1': 1}, '300-400': {'file2': 0, 'file1': 1}, '100-200': {'file2': 0, 'file1': 1}}, '4': {'200-300': {'file2': 0, 'file1': 1}, '300-400': {'file2': 0, 'file1': 1}, '100-200': {'file2': 0, 'file1': 1}}}
How do I change only one of the file values and not all of them?
Use a dict comprehension to produce your structure, where loop expressions are evaluated each iteration:
level2 = {n: {i: {'file1':0,'file2':0} for i in text2}} for n in text3}
You are not creating copies of the dictionaries, merely storing references to one dictionary object.
Thus, each time you stored text1 you created a reference, not a copy, and the same goes for each time you referenced level1.

GIven a set of strings, create a dictionary of dictionaries using them as keys to entries with default values

I have this:
set_of_strings = {'abc', 'def', 'xyz'}
And I want to create this:
dict_of_dicts = {
'abc': {'pr': 0, 'wt': 0},
'def' : {'pr': 0, 'wt': 0},
'xyz' : {'pr': 0, 'wt': 0}
}
What's the pythonic way? (Python 2.7)
Like this?
>>> set_of_strings = {'abc', 'def', 'xyz'}
>>> dict_of_dicts = {}
>>> for key in set_of_strings:
... dict_of_dicts[key] = {'pr':0, 'wt':0}
...
>>> print dict_of_dicts
{'xyz': {'pr': 0, 'wt': 0}, 'abc': {'pr': 0, 'wt': 0}, 'def': {'pr': 0, 'wt': 0}}
As a dictionary comprehension:
>>> {k:{'pr':0, 'wt':0} for k in {'abc', 'def', 'xyz'}}
{'xyz': {'pr': 0, 'wt': 0}, 'abc': {'pr': 0, 'wt': 0}, 'def': {'pr': 0, 'wt': 0}}
Alternatively, you can do something like:
>>> set_of_strings = {'abc', 'def', 'xyz'}
>>> value = {'pr': 0, 'wt': 0}
>>> dict(zip(set_of_strings, [value]*len(set_of_strings)))
{'xyz': {'pr': 0, 'wt': 0}, 'abc': {'pr': 0, 'wt': 0}, 'def': {'pr': 0, 'wt': 0}}
You can also use dict.fromkeys:
>>> d = dict.fromkeys({'abc', 'def', 'xyz'}, {'pr': 0, 'wt': 0})
>>> d
{'xyz': {'pr': 0, 'wt': 0}, 'abc': {'pr': 0, 'wt': 0}, 'def': {'pr': 0, 'wt': 0}}
NOTE:
The value specified ({'pr': 0, 'wt': 0}) is shared by all keys.
>>> d['xyz']['py'] = 1
>>> d
{'xyz': {'pr': 0, 'py': 1, 'wt': 0}, 'abc': {'pr': 0, 'py': 1, 'wt': 0}, 'def': {'pr': 0, 'py': 1, 'wt': 0}}
As the other answers show, there are several ways to achieve this, but IMO the most (only?) pythonic way is using a dict comprehension:
keys = ...
{ k: { 'pr': 0, 'wt': 0 } for k in keys }
If the values were immutable, dict.fromkeys is good, and is probably faster than dict comprehension.

Categories

Resources