How to evaluate values in a dictionary? - python

I have a dictionary (d).
I want to use this dictionary with different values for a and b.
How can I update the dictionary's output without calling the whole dictionary again?
a=1
b=2
d = {'value_1': a+b , 'value_2':a-b}

Based on your feedback to my first answer, it sounds like what you want is something that behaves more-or-less like a spreadsheet. Below is how to implement one that is very dictionary-like. It's based on Raymond Hettinger's ActiveState recipe by that name with some modifications and extensions.
Note that except for the special case of keyword arguments passed when an instance of the class is created, the values in it should all be strings, not numerical values.
Also note that since it uses eval(), for security purposes it should only be used with input from trusted sources.
I think it's important to understand that although the Spreadsheet class presented below isn't technically a dictionary, it behaves a lot like (a subclass of) one and if used instead of a regular dictionary will give you the capabilities you want. See the description of mapping in the online documentation's glossary.
from collections.abc import MutableMapping
class SpreadSheet(MutableMapping):
def __init__(self, tools=None, **kwargs):
self._cells = {}
for key, value in kwargs.items():
self._cells[key] = value if isinstance(value, str) else str(value)
self._tools = {'__builtins__': None} # Prevent eval() from supplying.
if tools is not None:
self._tools.update(tools) # Add any caller-supplied functions.
def clear(self):
return self._cells.clear()
def copy(self):
return self._cells.copy()
def __contains__(self, key):
return key in self._cells
def __setitem__(self, key, formula):
self._cells[key] = formula
def __getitem__(self, key):
return eval(self._cells[key], self._tools, self)
def __len__(self):
return len(self._cells)
def __iter__(self):
return iter(self._cells)
def __delitem__(self, key):
del self._cells[key]
def getformula(self, key):
""" Return raw un-evaluated contents of cell. """
return self._cells[key]
def update(self, *args, **kwargs):
for k, v in dict(*args, **kwargs).items():
self[k] = v
Sample usage:
d = SpreadSheet(a=1, b=2)
d.update({'x': 'x1',
'x1': 'a+2',
'x2': 'b+1',
'x3': 'a+b'})
xx = d['x']
print(xx) # -> 3

You could do it by storing functions in a separate dictionary and then create new ones by evaluating it.
Here's a simple example illustrating what I'm suggesting:
funcs = {'value_1': lambda: a+b,
'value_2': lambda: a-b}
a=1
b=2
d = {k: v() for k, v in funcs.items()}
print(d) # -> {'value_1': 3, 'value_2': -1}
a=3
b=5
d = {k: v() for k, v in funcs.items()}
print(d) # -> {'value_1': 8, 'value_2': -2}

Related

Custom key function for python defaultdict

What is a good way to define a custom key function analogous to the key argument to list.sort, for use in a collections.defaultdict?
Here's an example use case:
import collections
class Path(object):
def __init__(self, start, end, *other_features):
self._first = start
self._last = end
self._rest = other_features
def startpoint(self):
return self._first
def endpoint(self):
return self._last
# Maybe it has __eq__ and __hash__, maybe not
paths = [... a list of Path objects ...]
by_endpoint = collections.defaultdict(list)
for p in paths:
by_last_name[p.endpoint()].append(p)
# do stuff that depends on lumping paths with the same endpoint together
What I desire is a way to tell by_endpoint to use Path.endpoint as the key function, similar to the key argument to list.sort, and not have to put this key definition into the Path class itself (via __eq__ and __hash__), since it is just as sensible to also support "lumping by start point" as well.
Something like this maybe:
from collections import defaultdict
class defaultkeydict(defaultdict):
def __init__(self, default_factory, key=lambda x: x, *args, **kwargs):
defaultdict.__init__(self, default_factory, *args, **kwargs)
self.key_func = key
def __getitem__(self, key):
return defaultdict.__getitem__(self, self.get_key(key))
def __setitem__(self, key, value):
defaultdict.__setitem__(self, self.get_key(key), value)
def get_key(self, key):
try:
return self.key_func(key)
except Exception:
return key
Note the logic that falls back to the passed-in key if the key function can't be executed. That way you can still access the items using strings or whatever keys.
Now:
p = Path("Seattle", "Boston")
d = defaultkeydict(list, key=lambda x: x.endpoint())
d[p].append(p)
print(d) # defaultdict(<type 'list'>, {'Boston': [<__main__.Path object at ...>]})

Setup dictionary lazily

Let's say I have this dictionary in python, defined at the module level (mysettings.py):
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
...
}
I would like those values to be computed when the keys are accessed:
from mysettings import settings # settings is only "prepared"
print settings['expensive1'] # Now the value is really computed.
Is this possible? How?
Don't inherit build-in dict. Even if you overwrite dict.__getitem__() method, dict.get() would not work as you expected.
The right way is to inherit abc.Mapping from collections.
from collections.abc import Mapping
class LazyDict(Mapping):
def __init__(self, *args, **kw):
self._raw_dict = dict(*args, **kw)
def __getitem__(self, key):
func, arg = self._raw_dict.__getitem__(key)
return func(arg)
def __iter__(self):
return iter(self._raw_dict)
def __len__(self):
return len(self._raw_dict)
Then you can do:
settings = LazyDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
I also list sample code and examples here: https://gist.github.com/gyli/9b50bb8537069b4e154fec41a4b5995a
If you don't separe the arguments from the callable, I don't think it's possible. However, this should work:
class MySettingsDict(dict):
def __getitem__(self, item):
function, arg = dict.__getitem__(self, item)
return function(arg)
def expensive_to_compute(arg):
return arg * 3
And now:
>>> settings = MySettingsDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
>>> settings['expensive1']
3
>>> settings['expensive2']
6
Edit:
You may also want to cache the results of expensive_to_compute, if they are to be accessed multiple times. Something like this
class MySettingsDict(dict):
def __getitem__(self, item):
value = dict.__getitem__(self, item)
if not isinstance(value, int):
function, arg = value
value = function(arg)
dict.__setitem__(self, item, value)
return value
And now:
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2),
(<function expensive_to_compute at 0x9b0a62c>, 1)])
>>> settings['expensive1']
3
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2), 3])
You may also want to override other dict methods depending of how you want to use the dict.
Store references to the functions as the values for the keys i.e:
def A():
return "that took ages"
def B():
return "that took for-ever"
settings = {
"A": A,
"B": B,
}
print(settings["A"]())
This way, you only evaluate the function associated with a key when you access it and invoke it. A suitable class which can handle having non-lazy values would be:
import types
class LazyDict(dict):
def __getitem__(self,key):
item = dict.__getitem__(self,key)
if isinstance(item,types.FunctionType):
return item()
else:
return item
usage:
settings = LazyDict([("A",A),("B",B)])
print(settings["A"])
>>>
that took ages
You can make expensive_to_compute a generator function:
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
}
Then try:
from mysettings import settings
print next(settings['expensive1'])
I would populate the dictionary values with callables and change them to the result upon reading.
class LazyDict(dict):
def __getitem__(self, k):
v = super().__getitem__(k)
if callable(v):
v = v()
super().__setitem__(k, v)
return v
def get(self, k, default=None):
if k in self:
return self.__getitem__(k)
return default
Then with
def expensive_to_compute(arg):
print('Doing heavy stuff')
return arg * 3
you can do:
>>> settings = LazyDict({
'expensive1': lambda: expensive_to_compute(1),
'expensive2': lambda: expensive_to_compute(2),
})
>>> settings.__repr__()
"{'expensive1': <function <lambda> at 0x000001A0BA2B8EA0>, 'expensive2': <function <lambda> at 0x000001A0BA2B8F28>}"
>>> settings['expensive1']
Doing heavy stuff
3
>>> settings.get('expensive2')
Doing heavy stuff
6
>>> settings.__repr__()
"{'expensive1': 3, 'expensive2': 6}"
I recently needed something similar. Mixing both strategies from Guangyang Li and michaelmeyer, here is how I did it:
class LazyDict(MutableMapping):
"""Lazily evaluated dictionary."""
function = None
def __init__(self, *args, **kargs):
self._dict = dict(*args, **kargs)
def __getitem__(self, key):
"""Evaluate value."""
value = self._dict[key]
if not isinstance(value, ccData):
value = self.function(value)
self._dict[key] = value
return value
def __setitem__(self, key, value):
"""Store value lazily."""
self._dict[key] = value
def __delitem__(self, key):
"""Delete value."""
return self._dict[key]
def __iter__(self):
"""Iterate over dictionary."""
return iter(self._dict)
def __len__(self):
"""Evaluate size of dictionary."""
return len(self._dict)
Let's lazily evaluate the following function:
def expensive_to_compute(arg):
return arg * 3
The advantage is that the function is yet to be defined within the object and the arguments are the ones actually stored (which is what I needed):
>>> settings = LazyDict({'expensive1': 1, 'expensive2': 2})
>>> settings.function = expensive_to_compute # function unknown until now!
>>> settings['expensive1']
3
>>> settings['expensive2']
6
This approach works with a single function only.
I can point out the following advantages:
implements the complete MutableMapping API
if your function is non-deterministic, you can reset a value to re-evaluate
pass in a function to generate the values on the first attribute get:
class LazyDict(dict):
""" Fill in the values of a dict at first access """
def __init__(self, fn, *args, **kwargs):
self._fn = fn
self._fn_args = args or []
self._fn_kwargs = kwargs or {}
return super(LazyDict, self).__init__()
def _fn_populate(self):
if self._fn:
self._fn(self, *self._fn_args, **self._fn_kwargs)
self._fn = self._fn_args = self._fn_kwargs = None
def __getattribute__(self, name):
if not name.startswith('_fn'):
self._fn_populate()
return super(LazyDict, self).__getattribute__(name)
def __getitem__(self, item):
self._fn_populate()
return super(LazyDict, self).__getitem__(item)
>>> def _fn(self, val):
... print 'lazy loading'
... self['foo'] = val
...
>>> d = LazyDict(_fn, 'bar')
>>> d
{}
>>> d['foo']
lazy loading
'bar'
>>>
Alternatively, one can use the LazyDictionary package that creates a thread-safe lazy dictionary.
Installation:
pip install lazydict
Usage:
from lazydict import LazyDictionary
import tempfile
lazy = LazyDictionary()
lazy['temp'] = lambda: tempfile.mkdtemp()

How to implement an ordered, default dict?

I would like to combine OrderedDict() and defaultdict() from collections in one object, which shall be an ordered, default dict.
Is this possible?
The following (using a modified version of this recipe) works for me:
from collections import OrderedDict, Callable
class DefaultOrderedDict(OrderedDict):
# Source: http://stackoverflow.com/a/6190500/562769
def __init__(self, default_factory=None, *a, **kw):
if (default_factory is not None and
not isinstance(default_factory, Callable)):
raise TypeError('first argument must be callable')
OrderedDict.__init__(self, *a, **kw)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return OrderedDict.__getitem__(self, key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
self[key] = value = self.default_factory()
return value
def __reduce__(self):
if self.default_factory is None:
args = tuple()
else:
args = self.default_factory,
return type(self), args, None, None, self.items()
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
import copy
return type(self)(self.default_factory,
copy.deepcopy(self.items()))
def __repr__(self):
return 'OrderedDefaultDict(%s, %s)' % (self.default_factory,
OrderedDict.__repr__(self))
Here is another possibility, inspired by Raymond Hettinger's super() Considered Super, tested on Python 2.7.X and 3.4.X:
from collections import OrderedDict, defaultdict
class OrderedDefaultDict(OrderedDict, defaultdict):
def __init__(self, default_factory=None, *args, **kwargs):
#in python3 you can omit the args to super
super(OrderedDefaultDict, self).__init__(*args, **kwargs)
self.default_factory = default_factory
If you check out the class's MRO (aka, help(OrderedDefaultDict)), you'll see this:
class OrderedDefaultDict(collections.OrderedDict, collections.defaultdict)
| Method resolution order:
| OrderedDefaultDict
| collections.OrderedDict
| collections.defaultdict
| __builtin__.dict
| __builtin__.object
meaning that when an instance of OrderedDefaultDict is initialized, it defers to the OrderedDict's init, but this one in turn will call the defaultdict's methods before calling __builtin__.dict, which is precisely what we want.
If you want a simple solution that doesn't require a class, you can just use OrderedDict.setdefault(key, default=None) or OrderedDict.get(key, default=None). If you only get / set from a few places, say in a loop, you can easily just setdefault.
totals = collections.OrderedDict()
for i, x in some_generator():
totals[i] = totals.get(i, 0) + x
It is even easier for lists with setdefault:
agglomerate = collections.OrderedDict()
for i, x in some_generator():
agglomerate.setdefault(i, []).append(x)
But if you use it more than a few times, it is probably better to set up a class, like in the other answers.
Here's another solution to think about if your use case is simple like mine and you don't necessarily want to add the complexity of a DefaultOrderedDict class implementation to your code.
from collections import OrderedDict
keys = ['a', 'b', 'c']
items = [(key, None) for key in keys]
od = OrderedDict(items)
(None is my desired default value.)
Note that this solution won't work if one of your requirements is to dynamically insert new keys with the default value. A tradeoff of simplicity.
Update 3/13/17 - I learned of a convenience function for this use case. Same as above but you can omit the line items = ... and just:
od = OrderedDict.fromkeys(keys)
Output:
OrderedDict([('a', None), ('b', None), ('c', None)])
And if your keys are single characters, you can just pass one string:
OrderedDict.fromkeys('abc')
This has the same output as the two examples above.
You can also pass a default value as the second arg to OrderedDict.fromkeys(...).
Another simple approach would be to use dictionary get method
>>> from collections import OrderedDict
>>> d = OrderedDict()
>>> d['key'] = d.get('key', 0) + 1
>>> d['key'] = d.get('key', 0) + 1
>>> d
OrderedDict([('key', 2)])
>>>
A simpler version of #zeekay 's answer is:
from collections import OrderedDict
class OrderedDefaultListDict(OrderedDict): #name according to default
def __missing__(self, key):
self[key] = value = [] #change to whatever default you want
return value
A simple and elegant solution building on #NickBread.
Has a slightly different API to set the factory, but good defaults are always nice to have.
class OrderedDefaultDict(OrderedDict):
factory = list
def __missing__(self, key):
self[key] = value = self.factory()
return value
I created slightly fixed and more simplified version of the accepted answer, actual for python 3.7.
from collections import OrderedDict
from copy import copy, deepcopy
import pickle
from typing import Any, Callable
class DefaultOrderedDict(OrderedDict):
def __init__(
self,
default_factory: Callable[[], Any],
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
self[key] = value = self.default_factory()
return value
def __reduce__(self):
return type(self), (self.default_factory, ), None, None, iter(self.items())
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
return type(self)(self.default_factory, deepcopy(tuple(self.items()), memo))
def __repr__(self):
return f'{self.__class__.__name__}({self.default_factory}, {OrderedDict(self).__repr__()})'
And, that may be even more important, provided some tests.
a = DefaultOrderedDict(list)
# testing default
assert a['key'] == []
a['key'].append(1)
assert a['key'] == [1, ]
# testing repr
assert repr(a) == "DefaultOrderedDict(<class 'list'>, OrderedDict([('key', [1])]))"
# testing copy
b = a.copy()
assert b['key'] is a['key']
c = copy(a)
assert c['key'] is a['key']
d = deepcopy(a)
assert d['key'] is not a['key']
assert d['key'] == a['key']
# testing pickle
saved = pickle.dumps(a)
restored = pickle.loads(saved)
assert restored is not a
assert restored == a
# testing order
a['second_key'] = [2, ]
a['key'] = [3, ]
assert list(a.items()) == [('key', [3, ]), ('second_key', [2, ])]
Inspired by other answers on this thread, you can use something like,
from collections import OrderedDict
class OrderedDefaultDict(OrderedDict):
def __missing__(self, key):
value = OrderedDefaultDict()
self[key] = value
return value
I would like to know if there're any downsides of initializing another object of the same class in the missing method.
i tested the default dict and discovered it's also sorted!
maybe it was just a coincidence but anyway you can use the sorted function:
sorted(s.items())
i think it's simpler

What would a "frozen dict" be?

A frozen set is a frozenset.
A frozen list could be a tuple.
What would a frozen dict be? An immutable, hashable dict.
I guess it could be something like collections.namedtuple, but that is more like a frozen-keys dict (a half-frozen dict). Isn't it?
A "frozendict" should be a frozen dictionary, it should have keys, values, get, etc., and support in, for, etc.
update :
* there it is : https://www.python.org/dev/peps/pep-0603
Python doesn't have a builtin frozendict type. It turns out this wouldn't be useful too often (though it would still probably be useful more often than frozenset is).
The most common reason to want such a type is when memoizing function calls for functions with unknown arguments. The most common solution to store a hashable equivalent of a dict (where the values are hashable) is something like tuple(sorted(kwargs.items())).
This depends on the sorting not being a bit insane. Python cannot positively promise sorting will result in something reasonable here. (But it can't promise much else, so don't sweat it too much.)
You could easily enough make some sort of wrapper that works much like a dict. It might look something like
import collections
class FrozenDict(collections.Mapping):
"""Don't forget the docstrings!!"""
def __init__(self, *args, **kwargs):
self._d = dict(*args, **kwargs)
self._hash = None
def __iter__(self):
return iter(self._d)
def __len__(self):
return len(self._d)
def __getitem__(self, key):
return self._d[key]
def __hash__(self):
# It would have been simpler and maybe more obvious to
# use hash(tuple(sorted(self._d.iteritems()))) from this discussion
# so far, but this solution is O(n). I don't know what kind of
# n we are going to run into, but sometimes it's hard to resist the
# urge to optimize when it will gain improved algorithmic performance.
if self._hash is None:
hash_ = 0
for pair in self.items():
hash_ ^= hash(pair)
self._hash = hash_
return self._hash
It should work great:
>>> x = FrozenDict(a=1, b=2)
>>> y = FrozenDict(a=1, b=2)
>>> x is y
False
>>> x == y
True
>>> x == {'a': 1, 'b': 2}
True
>>> d = {x: 'foo'}
>>> d[y]
'foo'
Curiously, although we have the seldom useful frozenset, there's still no frozen mapping. The idea was rejected in PEP 416 -- Add a frozendict builtin type. This idea may be revisited in a later Python release, see PEP 603 -- Adding a frozenmap type to collections.
So the Python 2 solution to this:
def foo(config={'a': 1}):
...
Still seems to be the usual:
def foo(config=None):
if config is None:
config = {'a': 1} # default config
...
In Python 3 you have the option of this:
from types import MappingProxyType
default_config = {'a': 1}
DEFAULTS = MappingProxyType(default_config)
def foo(config=DEFAULTS):
...
Now the default config can be updated dynamically, but remain immutable where you want it to be immutable by passing around the proxy instead.
So changes in the default_config will update DEFAULTS as expected, but you can't write to the mapping proxy object itself.
Admittedly it's not really the same thing as an "immutable, hashable dict", but it might be a decent substitute for some use cases of a frozendict.
Assuming the keys and values of the dictionary are themselves immutable (e.g. strings) then:
>>> d
{'forever': 'atones', 'minks': 'cards', 'overhands': 'warranted',
'hardhearted': 'tartly', 'gradations': 'snorkeled'}
>>> t = tuple((k, d[k]) for k in sorted(d.keys()))
>>> hash(t)
1524953596
There is no fronzedict, but you can use MappingProxyType that was added to the standard library with Python 3.3:
>>> from types import MappingProxyType
>>> foo = MappingProxyType({'a': 1})
>>> foo
mappingproxy({'a': 1})
>>> foo['a'] = 2
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'mappingproxy' object does not support item assignment
>>> foo
mappingproxy({'a': 1})
I think of frozendict everytime I write a function like this:
def do_something(blah, optional_dict_parm=None):
if optional_dict_parm is None:
optional_dict_parm = {}
Install frozendict
pip install frozendict
Use it!
from frozendict import frozendict
def smth(param = frozendict({})):
pass
Here is the code I've been using. I subclassed frozenset. The advantages of this are the following.
This is a truly immutable object. No relying on the good behavior of future users and developers.
It's easy to convert back and forth between a regular dictionary and a frozen dictionary. FrozenDict(orig_dict) --> frozen dictionary. dict(frozen_dict) --> regular dict.
Update Jan 21 2015: The original piece of code I posted in 2014 used a for-loop to find a key that matched. That was incredibly slow. Now I've put together an implementation which takes advantage of frozenset's hashing features. Key-value pairs are stored in special containers where the __hash__ and __eq__ functions are based on the key only. This code has also been formally unit-tested, unlike what I posted here in August 2014.
MIT-style license.
if 3 / 2 == 1:
version = 2
elif 3 / 2 == 1.5:
version = 3
def col(i):
''' For binding named attributes to spots inside subclasses of tuple.'''
g = tuple.__getitem__
#property
def _col(self):
return g(self,i)
return _col
class Item(tuple):
''' Designed for storing key-value pairs inside
a FrozenDict, which itself is a subclass of frozenset.
The __hash__ is overloaded to return the hash of only the key.
__eq__ is overloaded so that normally it only checks whether the Item's
key is equal to the other object, HOWEVER, if the other object itself
is an instance of Item, it checks BOTH the key and value for equality.
WARNING: Do not use this class for any purpose other than to contain
key value pairs inside FrozenDict!!!!
The __eq__ operator is overloaded in such a way that it violates a
fundamental property of mathematics. That property, which says that
a == b and b == c implies a == c, does not hold for this object.
Here's a demonstration:
[in] >>> x = Item(('a',4))
[in] >>> y = Item(('a',5))
[in] >>> hash('a')
[out] >>> 194817700
[in] >>> hash(x)
[out] >>> 194817700
[in] >>> hash(y)
[out] >>> 194817700
[in] >>> 'a' == x
[out] >>> True
[in] >>> 'a' == y
[out] >>> True
[in] >>> x == y
[out] >>> False
'''
__slots__ = ()
key, value = col(0), col(1)
def __hash__(self):
return hash(self.key)
def __eq__(self, other):
if isinstance(other, Item):
return tuple.__eq__(self, other)
return self.key == other
def __ne__(self, other):
return not self.__eq__(other)
def __str__(self):
return '%r: %r' % self
def __repr__(self):
return 'Item((%r, %r))' % self
class FrozenDict(frozenset):
''' Behaves in most ways like a regular dictionary, except that it's immutable.
It differs from other implementations because it doesn't subclass "dict".
Instead it subclasses "frozenset" which guarantees immutability.
FrozenDict instances are created with the same arguments used to initialize
regular dictionaries, and has all the same methods.
[in] >>> f = FrozenDict(x=3,y=4,z=5)
[in] >>> f['x']
[out] >>> 3
[in] >>> f['a'] = 0
[out] >>> TypeError: 'FrozenDict' object does not support item assignment
FrozenDict can accept un-hashable values, but FrozenDict is only hashable if its values are hashable.
[in] >>> f = FrozenDict(x=3,y=4,z=5)
[in] >>> hash(f)
[out] >>> 646626455
[in] >>> g = FrozenDict(x=3,y=4,z=[])
[in] >>> hash(g)
[out] >>> TypeError: unhashable type: 'list'
FrozenDict interacts with dictionary objects as though it were a dict itself.
[in] >>> original = dict(x=3,y=4,z=5)
[in] >>> frozen = FrozenDict(x=3,y=4,z=5)
[in] >>> original == frozen
[out] >>> True
FrozenDict supports bi-directional conversions with regular dictionaries.
[in] >>> original = {'x': 3, 'y': 4, 'z': 5}
[in] >>> FrozenDict(original)
[out] >>> FrozenDict({'x': 3, 'y': 4, 'z': 5})
[in] >>> dict(FrozenDict(original))
[out] >>> {'x': 3, 'y': 4, 'z': 5} '''
__slots__ = ()
def __new__(cls, orig={}, **kw):
if kw:
d = dict(orig, **kw)
items = map(Item, d.items())
else:
try:
items = map(Item, orig.items())
except AttributeError:
items = map(Item, orig)
return frozenset.__new__(cls, items)
def __repr__(self):
cls = self.__class__.__name__
items = frozenset.__iter__(self)
_repr = ', '.join(map(str,items))
return '%s({%s})' % (cls, _repr)
def __getitem__(self, key):
if key not in self:
raise KeyError(key)
diff = self.difference
item = diff(diff({key}))
key, value = set(item).pop()
return value
def get(self, key, default=None):
if key not in self:
return default
return self[key]
def __iter__(self):
items = frozenset.__iter__(self)
return map(lambda i: i.key, items)
def keys(self):
items = frozenset.__iter__(self)
return map(lambda i: i.key, items)
def values(self):
items = frozenset.__iter__(self)
return map(lambda i: i.value, items)
def items(self):
items = frozenset.__iter__(self)
return map(tuple, items)
def copy(self):
cls = self.__class__
items = frozenset.copy(self)
dupl = frozenset.__new__(cls, items)
return dupl
#classmethod
def fromkeys(cls, keys, value):
d = dict.fromkeys(keys,value)
return cls(d)
def __hash__(self):
kv = tuple.__hash__
items = frozenset.__iter__(self)
return hash(frozenset(map(kv, items)))
def __eq__(self, other):
if not isinstance(other, FrozenDict):
try:
other = FrozenDict(other)
except Exception:
return False
return frozenset.__eq__(self, other)
def __ne__(self, other):
return not self.__eq__(other)
if version == 2:
#Here are the Python2 modifications
class Python2(FrozenDict):
def __iter__(self):
items = frozenset.__iter__(self)
for i in items:
yield i.key
def iterkeys(self):
items = frozenset.__iter__(self)
for i in items:
yield i.key
def itervalues(self):
items = frozenset.__iter__(self)
for i in items:
yield i.value
def iteritems(self):
items = frozenset.__iter__(self)
for i in items:
yield (i.key, i.value)
def has_key(self, key):
return key in self
def viewkeys(self):
return dict(self).viewkeys()
def viewvalues(self):
return dict(self).viewvalues()
def viewitems(self):
return dict(self).viewitems()
#If this is Python2, rebuild the class
#from scratch rather than use a subclass
py3 = FrozenDict.__dict__
py3 = {k: py3[k] for k in py3}
py2 = {}
py2.update(py3)
dct = Python2.__dict__
py2.update({k: dct[k] for k in dct})
FrozenDict = type('FrozenDict', (frozenset,), py2)
You may use frozendict from utilspie package as:
>>> from utilspie.collectionsutils import frozendict
>>> my_dict = frozendict({1: 3, 4: 5})
>>> my_dict # object of `frozendict` type
frozendict({1: 3, 4: 5})
# Hashable
>>> {my_dict: 4}
{frozendict({1: 3, 4: 5}): 4}
# Immutable
>>> my_dict[1] = 5
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/mquadri/workspace/utilspie/utilspie/collectionsutils/collections_utils.py", line 44, in __setitem__
self.__setitem__.__name__, type(self).__name__))
AttributeError: You can not call '__setitem__()' for 'frozendict' object
As per the document:
frozendict(dict_obj): Accepts obj of dict type and returns a hashable and immutable dict
Subclassing dict
i see this pattern in the wild (github) and wanted to mention it:
class FrozenDict(dict):
def __init__(self, *args, **kwargs):
self._hash = None
super(FrozenDict, self).__init__(*args, **kwargs)
def __hash__(self):
if self._hash is None:
self._hash = hash(tuple(sorted(self.items()))) # iteritems() on py2
return self._hash
def _immutable(self, *args, **kws):
raise TypeError('cannot change object - object is immutable')
# makes (deep)copy alot more efficient
def __copy__(self):
return self
def __deepcopy__(self, memo=None):
if memo is not None:
memo[id(self)] = self
return self
__setitem__ = _immutable
__delitem__ = _immutable
pop = _immutable
popitem = _immutable
clear = _immutable
update = _immutable
setdefault = _immutable
example usage:
d1 = FrozenDict({'a': 1, 'b': 2})
d2 = FrozenDict({'a': 1, 'b': 2})
d1.keys()
assert isinstance(d1, dict)
assert len(set([d1, d2])) == 1 # hashable
Pros
support for get(), keys(), items() (iteritems() on py2) and all the goodies from dict out of the box without explicitly implementing them
uses internally dict which means performance (dict is written in c in CPython)
elegant simple and no black magic
isinstance(my_frozen_dict, dict) returns True - although python encourages duck-typing many packages uses isinstance(), this can save many tweaks and customizations
Cons
any subclass can override this or access it internally (you cant really 100% protect something in python, you should trust your users and provide good documentation).
if you care for speed, you might want to make __hash__ a bit faster.
Yes, this is my second answer, but it is a completely different approach. The first implementation was in pure python. This one is in Cython. If you know how to use and compile Cython modules, this is just as fast as a regular dictionary. Roughly .04 to .06 micro-sec to retrieve a single value.
This is the file "frozen_dict.pyx"
import cython
from collections import Mapping
cdef class dict_wrapper:
cdef object d
cdef int h
def __init__(self, *args, **kw):
self.d = dict(*args, **kw)
self.h = -1
def __len__(self):
return len(self.d)
def __iter__(self):
return iter(self.d)
def __getitem__(self, key):
return self.d[key]
def __hash__(self):
if self.h == -1:
self.h = hash(frozenset(self.d.iteritems()))
return self.h
class FrozenDict(dict_wrapper, Mapping):
def __repr__(self):
c = type(self).__name__
r = ', '.join('%r: %r' % (k,self[k]) for k in self)
return '%s({%s})' % (c, r)
__all__ = ['FrozenDict']
Here's the file "setup.py"
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules = cythonize('frozen_dict.pyx')
)
If you have Cython installed, save the two files above into the same directory. Move to that directory in the command line.
python setup.py build_ext --inplace
python setup.py install
And you should be done.
The main disadvantage of namedtuple is that it needs to be specified before it is used, so it's less convenient for single-use cases.
However, there is a practical workaround that can be used to handle many such cases. Let's say that you want to have an immutable equivalent of the following dict:
MY_CONSTANT = {
'something': 123,
'something_else': 456
}
This can be emulated like this:
from collections import namedtuple
MY_CONSTANT = namedtuple('MyConstant', 'something something_else')(123, 456)
It's even possible to write an auxiliary function to automate this:
def freeze_dict(data):
from collections import namedtuple
keys = sorted(data.keys())
frozen_type = namedtuple(''.join(keys), keys)
return frozen_type(**data)
a = {'foo':'bar', 'x':'y'}
fa = freeze_dict(data)
assert a['foo'] == fa.foo
Of course this works only for flat dicts, but it shouldn't be too difficult to implement a recursive version.
freeze implements frozen collections (dict, list and set) that are hashable, type-hinted and will recursively freeze the data you give them (when possible) for you.
pip install frz
Usage:
from freeze import FDict
a_mutable_dict = {
"list": [1, 2],
"set": {3, 4},
}
a_frozen_dict = FDict(a_mutable_dict)
print(repr(a_frozen_dict))
# FDict: {'list': FList: (1, 2), 'set': FSet: {3, 4}}
In the absence of native language support, you can either do it yourself or use an existing solution. Fortunately Python makes it dead simple to extend off of their base implementations.
class frozen_dict(dict):
def __setitem__(self, key, value):
raise Exception('Frozen dictionaries cannot be mutated')
frozen_dict = frozen_dict({'foo': 'FOO' })
print(frozen['foo']) # FOO
frozen['foo'] = 'NEWFOO' # Exception: Frozen dictionaries cannot be mutated
# OR
from types import MappingProxyType
frozen_dict = MappingProxyType({'foo': 'FOO'})
print(frozen_dict['foo']) # FOO
frozen_dict['foo'] = 'NEWFOO' # TypeError: 'mappingproxy' object does not support item assignment
I needed to access fixed keys for something at one point for something that was a sort of globally-constanty kind of thing and I settled on something like this:
class MyFrozenDict:
def __getitem__(self, key):
if key == 'mykey1':
return 0
if key == 'mykey2':
return "another value"
raise KeyError(key)
Use it like
a = MyFrozenDict()
print(a['mykey1'])
WARNING: I don't recommend this for most use cases as it makes some pretty severe tradeoffs.

How to properly subclass dict and override __getitem__ & __setitem__

I am debugging some code and I want to find out when a particular dictionary is accessed. Well, it's actually a class that subclass dict and implements a couple extra features. Anyway, what I would like to do is subclass dict myself and add override __getitem__ and __setitem__ to produce some debugging output. Right now, I have
class DictWatch(dict):
def __init__(self, *args):
dict.__init__(self, args)
def __getitem__(self, key):
val = dict.__getitem__(self, key)
log.info("GET %s['%s'] = %s" % str(dict.get(self, 'name_label')), str(key), str(val)))
return val
def __setitem__(self, key, val):
log.info("SET %s['%s'] = %s" % str(dict.get(self, 'name_label')), str(key), str(val)))
dict.__setitem__(self, key, val)
'name_label' is a key which will eventually be set that I want to use to identify the output. I have then changed the class I am instrumenting to subclass DictWatch instead of dict and changed the call to the superconstructor. Still, nothing seems to be happening. I thought I was being clever, but I wonder if I should be going a different direction.
Thanks for the help!
Another issue when subclassing dict is that the built-in __init__ doesn't call update, and the built-in update doesn't call __setitem__. So, if you want all setitem operations to go through your __setitem__ function, you should make sure that it gets called yourself:
class DictWatch(dict):
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __getitem__(self, key):
val = dict.__getitem__(self, key)
print('GET', key)
return val
def __setitem__(self, key, val):
print('SET', key, val)
dict.__setitem__(self, key, val)
def __repr__(self):
dictrepr = dict.__repr__(self)
return '%s(%s)' % (type(self).__name__, dictrepr)
def update(self, *args, **kwargs):
print('update', args, kwargs)
for k, v in dict(*args, **kwargs).items():
self[k] = v
What you're doing should absolutely work. I tested out your class, and aside from a missing opening parenthesis in your log statements, it works just fine. There are only two things I can think of. First, is the output of your log statement set correctly? You might need to put a logging.basicConfig(level=logging.DEBUG) at the top of your script.
Second, __getitem__ and __setitem__ are only called during [] accesses. So make sure you only access DictWatch via d[key], rather than d.get() and d.set()
Consider subclassing UserDict or UserList. These classes are intended to be subclassed whereas the normal dict and list are not, and contain optimisations.
That should not really change the result (which should work, for good logging threshold values) :
your init should be :
def __init__(self,*args,**kwargs) : dict.__init__(self,*args,**kwargs)
instead, because if you call your method with DictWatch([(1,2),(2,3)]) or DictWatch(a=1,b=2) this will fail.
(or,better, don't define a constructor for this)
As Andrew Pate's answer proposed, subclassing collections.UserDict instead of dict is much less error prone.
Here is an example showing an issue when inheriting dict naively:
class MyDict(dict):
def __setitem__(self, key, value):
super().__setitem__(key, value * 10)
d = MyDict(a=1, b=2) # Bad! MyDict.__setitem__ not called
d.update(c=3) # Bad! MyDict.__setitem__ not called
d['d'] = 4 # Good!
print(d) # {'a': 1, 'b': 2, 'c': 3, 'd': 40}
UserDict inherits from collections.abc.MutableMapping, so this works as expected:
class MyDict(collections.UserDict):
def __setitem__(self, key, value):
super().__setitem__(key, value * 10)
d = MyDict(a=1, b=2) # Good: MyDict.__setitem__ correctly called
d.update(c=3) # Good: MyDict.__setitem__ correctly called
d['d'] = 4 # Good
print(d) # {'a': 10, 'b': 20, 'c': 30, 'd': 40}
Similarly, you only have to implement __getitem__ to automatically be compatible with key in my_dict, my_dict.get, …
Note: UserDict is not a subclass of dict, so isinstance(UserDict(), dict) will fail (but isinstance(UserDict(), collections.abc.MutableMapping) will work).
All you will have to do is
class BatchCollection(dict):
def __init__(self, inpt={}):
super(BatchCollection, self).__init__(inpt)
A sample usage for my personal use
### EXAMPLE
class BatchCollection(dict):
def __init__(self, inpt={}):
super(BatchCollection, self).__init__(inpt)
def __setitem__(self, key, item):
if (isinstance(key, tuple) and len(key) == 2
and isinstance(item, collections.Iterable)):
# self.__dict__[key] = item
super(BatchCollection, self).__setitem__(key, item)
else:
raise Exception(
"Valid key should be a tuple (database_name, table_name) "
"and value should be iterable")
Note: tested only in python3

Categories

Resources