I have an arbitrarily deep set of nested dictionary:
x = {'a': 1, 'b': {'c': 6, 'd': 7, 'g': {'h': 3, 'i': 9}}, 'e': {'f': 3}}
and I'd like to basically apply a function to all the integers in the dictionaries, so like map, I guess, but for nested dictionaries.
So: map_nested_dicts(x, lambda v: v + 7) would be the sort of goal.
I'm stuck as to the best way to perhaps store the layers of keys to then put the modified value back into its correct position.
What would the best way/approach to do this be?
Visit all nested values recursively:
import collections
def map_nested_dicts(ob, func):
if isinstance(ob, collections.Mapping):
return {k: map_nested_dicts(v, func) for k, v in ob.iteritems()}
else:
return func(ob)
map_nested_dicts(x, lambda v: v + 7)
# Creates a new dict object:
# {'a': 8, 'b': {'c': 13, 'g': {'h': 10, 'i': 16}, 'd': 14}, 'e': {'f': 10}}
In some cases it's desired to modify the original dict object (to avoid re-creating it):
import collections
def map_nested_dicts_modify(ob, func):
for k, v in ob.iteritems():
if isinstance(v, collections.Mapping):
map_nested_dicts_modify(v, func)
else:
ob[k] = func(v)
map_nested_dicts_modify(x, lambda v: v + 7)
# x is now
# {'a': 8, 'b': {'c': 13, 'g': {'h': 10, 'i': 16}, 'd': 14}, 'e': {'f': 10}}
If you're using Python 3:
replace dict.iteritems with dict.items
replace import collections with import collections.abc
replace collections.Mapping with collections.abc.Mapping
Just to expand on vaultah's answer, if one of your elements can be a list, and you'd like to handle those too:
import collections
def map_nested_dicts_modify(ob, func):
for k, v in ob.iteritems():
if isinstance(v, collections.Mapping):
map_nested_dicts_modify(v, func)
elif isinstance(v, list):
ob[k] = map(func, v)
else:
ob[k] = func(v)
If you need it to work for both lists and dicts in arbitrary nesting:
def apply_recursive(func, obj):
if isinstance(obj, dict): # if dict, apply to each key
return {k: apply_recursive(func, v) for k, v in obj.items()}
elif isinstance(obj, list): # if list, apply to each element
return [apply_recursive(func, elem) for elem in obj]
else:
return func(obj)
If you want to avoid dependencies and you need to map a mixed dictionaries/iterables collection with any combination of nesting and deepness, you can use the following solution:
def map_nested_coll(func,obj):
if '__iter__' in dir(obj) and type(obj) not in (str,bytes):
if type(obj) == dict:
return {k:map_nested_coll(func,v) for k,v in obj.items()}
else:
return tuple(map_nested_coll(func,x) for x in obj)
else:
return func(obj)
In order to retain simplicity, non-dict iterables are converted to tuples (you can convert to list instead of tuple if you like, but converting to tuples is slightly faster). Also, although strings and bytes are iterables, usually you want to apply func on the whole string or bytes, so they are filtered out and not treated like iterables.
The advantage of this solution is that it works with any kind of iterable (even generators like zip, range and map) and handles edge cases well (see below):
>>> func = lambda x: x/2
>>> map_nested_coll(func, dict(a=1,b=dict(c=2,d=[3,(41,42),5]),e=[6,7]))
{'a': 0.5, 'b': {'c': 1.0, 'd': (1.5, (20.5, 21.0), 2.5)}, 'e': (3.0, 3.5)}
>>> map_nested_coll(func, [1,dict(a=2,b=3),(4,5)])
(0.5, {'a': 1.0, 'b': 1.5}, (2.0, 2.5))
>>> map_nested_itr(func, map(lambda x: 1+x, range(3)))
(0.5, 1.0, 1.5)
>>> map_nested_coll(func, 9)
4.5
>>> map_nested_coll(func, [])
()
>>> map_nested_itr(func, dict())
{}
I have a more general implementation that can accept any number of containers of any type as parameters.
from collections.abc import Iterable
import types
def dict_value_map(fun, *dicts):
keys = dicts[0].keys()
for d in dicts[1:]:
assert d.keys() == keys
return {k:fun(*(d[k] for d in dicts)) for k in keys}
def collection_map(fun, *collections):
assert len(collections) > 0
if isinstance(collections[0], dict):
return dict_value_map(fun, *collections)
if isinstance(collections[0], (tuple, list, set)):
return type(collections[0])(map(fun, *collections))
else:
return map(fun, *collections)
iscollection = lambda v:(isinstance(v,Iterable)and(not isinstance(v,str)))
def apply(fun, *collections, at=lambda collections: not iscollection(collections[0])):
'''
like standard map, but can apply the fun to inner elements.
at: a int, a function or sometype.
at = 0 means fun(*collections)
at = somefunction. fun will applied to the elements when somefunction(elements) is True
at = sometype. fun will applied to the elements when elements are sometype.
'''
if isinstance(at, int):
assert at >= 0
if at == 0:
return fun(*collections)
else:
return collection_map(lambda *cs:apply(fun, *cs, at=at-1), *collections)
if isinstance(at, types.FunctionType):
if at(collections):
return fun(*collections)
else:
return collection_map(lambda *cs:apply(fun, *cs, at=at), *collections)
else:
return apply(fun, *collections, at=lambda eles:isinstance(eles[0], at))
examples:
> apply(lambda x:2*x, [(1,2),(3,4)])
[(2, 4), (6, 8)]
> apply(lambda a,b: a+b, ([1,2],[3,4]), ([5,6],[7,8]))
([6, 8], [10, 12])
> apply(lambda a,b: a+b, ([1,2],[3,4]), ([5,6],[7,8]), at=1)
([1, 2, 5, 6], [3, 4, 7, 8])
> apply(lambda a,b: a+b, ([1,2],[3,4]), ([5,6],[7,8]), at=0)
([1, 2], [3, 4], [5, 6], [7, 8])
> apply(lambda a,b:a+b, {'m':[(1,2),[3,{4}]], 'n':5}, {'m':[(6,7),[8,{9}]],'n':10})
{'m': [(7, 9), [11, {13}]], 'n': 15}
> apply(str.upper, [('a','b'),('c','d')], at=str)
[('A', 'B'), ('C', 'D')]
and
> apply(lambda v:v+7, {'a': 1, 'b': {'c': 6, 'd': 7, 'g': {'h': 3, 'i': 9}}, 'e': {'f': 3}})
{'a': 8, 'b': {'c': 13, 'd': 14, 'g': {'h': 10, 'i': 16}}, 'e': {'f': 10}}
Related
I have seen an elegant solution for recursive functions using lambda in Python. As a matter of fact, I am very interested at applying this concept in multi-variable function, two variables — x and y — for instance. How can it be done in python?
The function itself is not important, but if it helps, here it comes the description: I have dictionaries containing one-layer dictionaries inside, as:
dicts = [{'A': {'a': 1}, 'B': {'a': 10}}, {'A': {'b':3}}, {'B': {'c': 31}}, {'A': {'j': 4, 't': 9}}, {'B': {'y': 400, 'r': 160}}]
And the keys in the inner dictionary does not repeat for the same given outer dictionary key. I want then to be merged, in a way that they result in:
result = {'A': {'a': 1, 'b': 3, 'j': 4, 't': 9}, 'B': {'a': 10, 'c': 31, 'r': 160, 'y': 400}}
The challenge of using lambda function to solve this kept my attention and focus. So if someone could give hints or help me solving this, I do appreciate!
Python lambda functions can only consist of a single expression. So when you want to do something complex with a lambda, what you are looking for is a one-liner.
from functools import reduce # Only needed in Python3
f = lambda dicts: reduce(lambda acc, el: {**acc, **{k: {**acc.get(k, {}), **v} for k, v in el.items()}}, dicts, {})
f(dicts) # {'A': {'a': 1, 'b': 3, 'j': 4, 't': 9}, 'B': {'a': 10, 'c': 31, 'r': 160, 'y': 400}}
Note that there is an implicit recursion with the reduce function. This provides you with an example of a multi-variable lambda.
I have a dictionary called wordCounts which maps a word to how many times it occurred, how can I get the top n words in the dict while allowing more than n if there is a tie?
As the previous answer says, you can cast as a Counter to make this dataset easier to deal with.
>>> from collections import Counter
>>> d = {"d":1,"c":2,"a":3,'b':3,'e':0,'f':1}
>>> c = Counter(d)
>>> c
Counter({'b': 3, 'a': 3, 'c': 2, 'f': 1, 'd': 1, 'e': 0})
Counter has a most_common(n) method that will take the n most common elements. Note that it will exclude ties. Therefore:
>>> c.most_common(4)
[('b', 3), ('a', 3), ('c', 2), ('f', 1)]
To include all values equal to the nth element, you can do something like the following, without converting to a Counter. This is pretty messy, but it should do the trick.
from collections import Counter
def most_common_inclusive(freq_dict, n):
# find the nth most common value
nth_most_common = sorted(c.values(), reverse=True)[n-1]
return { k: v for k, v in c.items() if v >= nth_most_common }
You can use as follows:
>>> d = {'b': 3, 'a': 3, 'c': 2, 'f': 1, 'd': 1, 'e': 0}
>>> most_common_inclusive(d, 4)
{'d': 1, 'b': 3, 'c': 2, 'f': 1, 'a': 3}
One solution could be:
from collections import Counter, defaultdict
list_of_words = ['dog', 'cat', 'moo', 'dog', 'pun', 'pun']
def get_n_most_common(n, list_of_words):
ct = Counter(list_of_words)
d = defaultdict(list)
for word, quantity in ct.items():
d[quantity].append(word)
most_common = sorted(d.keys(), reverse= True)
return [(word, val) for val in most_common[:n] for word in d[val]]
And the tests:
>> get_n_most_common(2, list_of_words)
=> [('pun', 2), ('dog', 2), ('moo', 1), ('cat', 1)]
>> get_n_most_common(1, list_of_words)
=> [('pun', 2), ('dog', 2)]
MooingRawr is on the right track, but now we need to just get the top n results
l = []
for i, (word, count) in enumerate(sorted(d.items(), reverse=True, key=lambda x: x[1])):
if i >= n and count<l[-1][1]:
break
l.append((word, count))
I'd like to implement a Counter which drops the least frequent element when the counter's size going beyond some threshold. For that I need to remove the least frequent element.
What is the fastest way to do that in Python?
I know counter.most_common()[-1], but it creates a whole list and seems slow when done extensively? Is there a better command (or maybe a different data structure)?
You may implement least_common by borrowing implementation of most_common and performing necessary changes.
Refer to collections source in Py2.7:
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
To change it in order to retrieve least common we need just a few adjustments.
import collections
from operator import itemgetter as _itemgetter
import heapq as _heapq
class MyCounter(collections.Counter):
def least_common(self, n=None):
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=False) # was: reverse=True
return _heapq.nsmallest(n, self.iteritems(), key=_itemgetter(1)) # was _heapq.nlargest
Tests:
c = MyCounter("abbcccddddeeeee")
assert c.most_common() == c.least_common()[::-1]
assert c.most_common()[-1:] == c.least_common(1)
Since your stated goal is to remove items in the counter below a threshold, just reverse the counter (so the values becomes a list of keys with that value) and then remove the keys in the counter below the threshold.
Example:
>>> c=Counter("aaaabccadddefeghizkdxxx")
>>> c
Counter({'a': 5, 'd': 4, 'x': 3, 'c': 2, 'e': 2, 'b': 1, 'g': 1, 'f': 1, 'i': 1, 'h': 1, 'k': 1, 'z': 1})
counts={}
for k, v in c.items():
counts.setdefault(v, []).append(k)
tol=2
for k, v in counts.items():
if k<=tol:
c=c-Counter({}.fromkeys(v, k))
>>> c
Counter({'a': 5, 'd': 4, 'x': 3})
In this example, all counts less than or equal to 2 are removed.
Or, just recreate the counter with a comparison to your threshold value:
>>> c
Counter({'a': 5, 'd': 4, 'x': 3, 'c': 2, 'e': 2, 'b': 1, 'g': 1, 'f': 1, 'i': 1, 'h': 1, 'k': 1, 'z': 1})
>>> Counter({k:v for k,v in c.items() if v>tol})
Counter({'a': 5, 'd': 4, 'x': 3})
If you only want to get the least common value, then the most efficient way to handle this is to simply get the minimum value from the counter (dictionary).
Since you can only say whether a value is the lowest, you actually need to look at all items, so a time complexity of O(n) is really the lowest we can get. However, we do not need to have a linear space complexity, as we only need to remember the lowest value, and not all of them. So a solution that works like most_common() in reverse is too much for us.
In this case, we can simply use min() with a custom key function here:
>>> c = Counter('foobarbazbar')
>>> c
Counter({'a': 3, 'b': 3, 'o': 2, 'r': 2, 'f': 1, 'z': 1})
>>> k = min(c, key=lambda x: c[x])
>>> del c[k]
>>> c
Counter({'a': 3, 'b': 3, 'o': 2, 'r': 2, 'z': 1})
Of course, since dictionaries are unordered, you do not get any influence on which of the lowest values is removed that way in case there are multiple with the same lowest occurrence.
In the following code, I would like to know if the grade_to_score dictionary will be created every time the method is called?
def get_score(grade):
grade_to_score = {'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0} # step 1
return grade_to_score.get(grade, -1)
also, what is the way to confirm that? I am working with Python 2.7
Yes it will. To get around it, you can pass it as a default argument so that it will only be evaluated once:
def get_score(grade, grade_to_score = {'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0}):
return grade_to_score.get(grade, -1)
or the better approach:
def get_score(grade, grade_to_score = None):
if grade_to_score == None:
grade_to_score = {'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0}
return grade_to_score.get(grade, -1)
To answer your question "what is the way to confirm that?", you can check whether the same object is being used each time:
def get_score(grade):
grade_to_score = {'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0} # step 1
print(id(grade_to_score)) # check object ID
return grade_to_score.get(grade, -1)
Now you can call it:
>>> a = get_score("")
50252080
>>> b = get_score("")
50249920
>>> c = get_score("")
50249776
A different id means a different object, so grade_to_score clearly is being created anew on each call. Interestingly, this doesn't happen if you call in a for loop:
>>> for _ in range(3):
a = get_score("")
50249920
50249920
50249920
>>> scores = [get_score(grade) for grade in "ABC"]
53737032
53737032
53737032
Yes, the dictionary is created a-new every time the function is called.
You can make it a global instead, or make it a function default:
grade_to_score = {'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0} # step 1
def get_score(grade):
return grade_to_score.get(grade, -1)
or
def get_score(grade, grade_to_score={'A': 10, 'B': 8, 'C': 6, 'D': 4, 'F': 0}):
return grade_to_score.get(grade, -1)
In the second casegrade_to_score is passed into the function as a local, so lookups are (marginally) faster.
In both cases the dictionary literal is executed once, on module import. Note that in both cases, because grade_to_score is a mutable dictionary, so any changes you make to it are global, not local to the get_score() call.
This sort is For Educational purposes. no build in sort is allowed.
If you think my question and the answer helped, please vote me and the first answer people:
#J.F. Sebastian
I have found this on SO --
"How to do an insertion sort on a list of dictionaries in python?"
but that answer does no seem right.
Using the answer code of the above question raises this error:
TypeError: list indices must be integers, not dict
Example :
lst = [{'a':20, 'b':30, 'c':25, 'd': 600},{'a':60, 'b':10, 'c':43, 'd': 20}]`
to sort using insertion sort, for example sort b, we should get
[{'a':60, 'b':10, 'c':43, 'd': 20},{'a':20, 'b':30, 'c':25, 'd': 600}]
But my code gets
[{'b': 10, 'c': 25, 'a': 20, 'd': 600}, {'b': 30, 'c': 43, 'a': 60, 'd': 20}]
he replace the key and value in the list of dictionary
Here is my Code:
def insertionSort(allData, key):
for i in range(len(allData)):
temp = allData[i][key]
j = i
while j > 0 and temp < allData[j - 1][key]:
allData[j][key] = allData[j - 1][key]
j = j - 1
allData[j][key] = temp
My homework sort result:
{'snow': 19.2, 'minT': -10.8, 'month': 12, 'maxT': 9.0, 'rain': 45.0, 'year': 2003, 'meanT': -0.1, 'yearmonth': 193801}
{'snow': 35.6, 'minT': -20.0, 'month': 1, 'maxT': 8.9, 'rain': 34.3, 'year': 1974, 'meanT': -5.9, 'yearmonth': 193802}
{'snow': 0.0, 'minT': 9.7, 'month': 8, 'maxT': 34.8, 'rain': 20.8, 'year': 2007, 'meanT': 22.4, 'yearmonth': 193803}`
After sorting the yearmonth, they replace the yearmonth from small to big, but not changing the dictionary.
Why does this happen, and how shoudl I change it?
==================================
Answer:
After some basic copy of 'J.F. Sebastian' code
I find that I can't directly using the code
sort(a,b)
output:
TypeError: 'str' object is not callable
I should use
sort(a,b=lambda x: x['thekey'])
then, JFS makes a new function to make it work.
I also find another way:
just change JFS code line 5:
from
if key(L[j]) <= key(d):
to
if L[j][key] <= d[key]:
THEN EVERYTHING WORKS!
Hope this can help other people too, and those using google and doing the same assignment as me.
Try to write an insertion sort procedure that works with integer lists. Then it is easy to modify it to accept a key parameter, to allow arbitrary comparisons:
def insertion_sort_impl(L, *, key):
for i in range(1, len(L)): # loop-invariant: `L[:i]` is sorted
d = L[i]
for j in range(i - 1, -1, -1):
if key(L[j]) <= key(d):
break
L[j + 1] = L[j]
else: # `key(L[j]) > key(d)` for all `j`
j -= 1
L[j + 1] = d
Example:
lst = [{'a':20, 'b':30, 'c':25, 'd': 600},{'a':60, 'b':10, 'c':43, 'd': 20}]
insertion_sort_impl(lst, key=lambda x: x['d']) # sort by `d` key
print(lst)
Output
[{'a': 60, 'c': 43, 'b': 10, 'd': 20}, {'a': 20, 'c': 25, 'b': 30, 'd': 600}]
Note: ignore the order of keys inside dictionary. It may change from run to run, but the dictionary with the key d that has lesser value will always be on the left.
The name key for insertion_sort_impl() function is unrelated to dictionary key. It is a naming convention that is used by many builtin functions; key is used to get a value to be used for comparison:
>>> max([1, 2], key=lambda x: -x)
1
>>> min([1, -2], key=lambda x: x*x)
1
>>> sorted([-1, 0, 1], key=lambda x: x*x-x)
[0, 1, -1]
To understand, why the loop is started at 1, you could read the general description of Insertion sort.
If the interface of insertion_sort function is fixed then you could define it in terms of insertion_sort_impl:
from operator import itemgetter
def insertion_sort(L, key):
return insertion_sort_impl(L, key=itemgetter(key))