__str__ and pretty-printing (sub)dictionaries - python

I have an object that consists primarily of a very large nested dictionary:
class my_object(object):
def __init__(self):
self.the_dict = {} # Big, nested dictionary
I've modified __ str__ to pretty-print the top-level dictionary by simply "printing" the object:
def __str__(self):
pp = pprint.PrettyPrinter()
return pp.pformat(self.the_dict)
My goal here was to make the user's life a bit easier when he/she peruses the object with IPython:
print(the_object) # Pretty-prints entire dict
This works to show the user the entire dictionary, but I would like to expand this functionality to sub-portions of the dictionary as well, allowing the user to get pretty-printed output from commands such as:
print(the_object.the_dict['level1']['level2']['level3'])
(would pretty-print only the 'level3' sub-dict)
Is there a straight-forward way to use __ str__ (or similar) to do this?

You could provide a custom displayhook that prints builtin dictionaries and other objects you choose according to your taste at an interactive prompt:
>>> import sys
>>> oldhook = sys.displayhook
>>> sys.displayhook = your_module.DisplayHook(oldhook)
It doesn't change print obj behavior.
The idea is that your users can choose whether they'd like to use your custom formatting for dicts or not.

When a user says
print(the_object.the_dict['level1']['level2']['level3'])
Python evaluates the_object.the_dict['level1']['level2']['level3'] and (let's say) finds it is a dict, and passes that on to print.
Since the_object.the_dict is a dict, the rest is out of the_object's control. As you burrow down through level1, level2, and level3, only the type of object returned by the_object.the_dict['level1']['level2']['level3'] is going to affect how print behaves. the_object's __str__ method is not going to affect anything beyond the_object itself.
Moreover, when printing nested objects, pprint.pformat uses the repr of the object, not str of the object.
So to get the behave we want, we need the_object.the_dict['level1']['level2']['level3'] to evaluate to something like a dict but with a different __repr__...
You could make a dict-like object (e.g. Turtle) and use Turtles all the way down:
import collections
import pprint
class Turtle(collections.MutableMapping):
def __init__(self,*args,**kwargs):
self._data=dict(*args,**kwargs)
def __getitem__(self,key):
return self._data[key]
def __setitem__(self, key, value):
self._data[key]=value
def __delitem__(self, key):
del self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self):
return len(self._data)
def __contains__(self, x):
return x in self._data
def __repr__(self):
return pprint.pformat(self._data)
class MyObject(object):
def __init__(self):
self.the_dict=Turtle()
def __repr__(self):
return repr(self.the_dict)
the_object=MyObject()
the_object.the_dict['level1']=Turtle()
the_object.the_dict['level1']['level2']=Turtle()
the_object.the_dict['level1']['level2']['level3']=Turtle({i:i for i in range(20)})
print(the_object)
print(the_object.the_dict['level1']['level2']['level3'])
To use this, you must replace all dicts in your nested dict structure with Turtles.
But really (as you can tell from my fanciful naming), I don't really expect you to use Turtles. Dicts are such nice, optimized builtins, I would not want to add this intermediate object just to effect pretty printing.
If instead you can convince your users to type
from pprint import pprint
then they can just use
pprint(the_object.the_dict['level1']['level2']['level3'])
to get pretty printing.

you can convert the underlying dictionaries to "pretty printing dictionaries" ... perhaps something like this will do:
class my_object( object ):
_pp = pprint.PrettyPrinter()
class PP_dict( dict ):
def __setitem__( self, key, value ):
if isinstance( value, dict ): value = PP_dict( value )
super( my_object.PP_dict, self ).__setitem__( key, value )
def __str__( self ):
return my_object.pp( self )
#property
def the_dict( self ):
return self.__dict__[ 'the_dict' ]
#the_dict.setter
def the_dict( self, value ):
self.__dict__[ 'the_dict' ] = my_object.PP_dict( value )
The property is only because I don't know how you set/manipulate "the_dict".
This approach is limited -- for instance if you put dict-derivatives that are not dicts in the_dict, they will be replaced by PP_dict. Also, if you have other reference to these subdicts, they will no longer be pointing to the same objects.
Another approach would be to put a __getitem__ in my_object directly, that returns a proxy wrapper for the dictionary that pretty prints the current object in __str__, overrides __getitem__ to return proxies for subobjects, and otherwise forwards all acccess/manipulation to the wrapped class.

Related

Dangers of overriding a dict in Python

I came across this question of trying to implement a dictionary using the collections.abc MutableMapping because I was looking for something similar.
For context, I was looking to implement a dictionary that would out of convenience also act as a mutable object so if I write this for example d = CustomDictionary({'a': 4}) then d.a returns 4.
For reference, here is the code posted by Aaron Hall for this particular problem:
from collections.abc import MutableMapping
class D(MutableMapping):
'''
Mapping that works like both a dict and a mutable object, i.e.
d = D(foo='bar')
and
d.foo returns 'bar'
'''
# ``__init__`` method required to create instance from class.
def __init__(self, *args, **kwargs):
'''Use the object dict'''
self.__dict__.update(*args, **kwargs)
# The next five methods are requirements of the ABC.
def __setitem__(self, key, value):
self.__dict__[key] = value
def __getitem__(self, key):
return self.__dict__[key]
def __delitem__(self, key):
del self.__dict__[key]
def __iter__(self):
return iter(self.__dict__)
def __len__(self):
return len(self.__dict__)
# The final two methods aren't required, but nice for demo purposes:
def __str__(self):
'''returns simple dict representation of the mapping'''
return str(self.__dict__)
def __repr__(self):
'''echoes class, id, & reproducible representation in the REPL'''
return '{}, D({})'.format(super(D, self).__repr__(),
self.__dict__)
However I didn't consider the dangers of doing so. Namely, if I created this custom dictionary class then I would expect to have methods. But what if a method name clashes with a key with the same name? For example:
def doSomething(self):
""" A method of CustomDictionary"""
print("hey!")
d = CustomDictionary()
d['a'] = 3
d['doSomething'] = 4
d.doSomething()
would raise a TypeError: 'int' object is not callable since d.doSomething would return 4 which is not a callable function.
What do you think? How would I go about implementing methods for a custom dictionary class while avoiding this problem.
Unfortunately I couldn't comment on the post since I don't have enough reputation but I was hoping this question deserves its own post.

Understanding __getitem__ method

I have gone through most of the documentation of __getitem__ in the Python docs, but I am still unable to grasp the meaning of it.
So all I can understand is that __getitem__ is used to implement calls like self[key]. But what is the use of it?
Lets say I have a python class defined in this way:
class Person:
def __init__(self,name,age):
self.name = name
self.age = age
def __getitem__(self,key):
print ("Inside `__getitem__` method!")
return getattr(self,key)
p = Person("Subhayan",32)
print (p["age"])
This returns the results as expected. But why use __getitem__ in the first place? I have also heard that Python calls __getitem__ internally. But why does it do it?
Can someone please explain this in more detail?
Cong Ma does a good job of explaining what __getitem__ is used for - but I want to give you an example which might be useful.
Imagine a class which models a building. Within the data for the building it includes a number of attributes, including descriptions of the companies that occupy each floor :
Without using __getitem__ we would have a class like this :
class Building(object):
def __init__(self, floors):
self._floors = [None]*floors
def occupy(self, floor_number, data):
self._floors[floor_number] = data
def get_floor_data(self, floor_number):
return self._floors[floor_number]
building1 = Building(4) # Construct a building with 4 floors
building1.occupy(0, 'Reception')
building1.occupy(1, 'ABC Corp')
building1.occupy(2, 'DEF Inc')
print( building1.get_floor_data(2) )
We could however use __getitem__ (and its counterpart __setitem__) to make the usage of the Building class 'nicer'.
class Building(object):
def __init__(self, floors):
self._floors = [None]*floors
def __setitem__(self, floor_number, data):
self._floors[floor_number] = data
def __getitem__(self, floor_number):
return self._floors[floor_number]
building1 = Building(4) # Construct a building with 4 floors
building1[0] = 'Reception'
building1[1] = 'ABC Corp'
building1[2] = 'DEF Inc'
print( building1[2] )
Whether you use __setitem__ like this really depends on how you plan to abstract your data - in this case we have decided to treat a building as a container of floors (and you could also implement an iterator for the Building, and maybe even the ability to slice - i.e. get more than one floor's data at a time - it depends on what you need.
The [] syntax for getting item by key or index is just syntax sugar.
When you evaluate a[i] Python calls a.__getitem__(i) (or type(a).__getitem__(a, i), but this distinction is about inheritance models and is not important here). Even if the class of a may not explicitly define this method, it is usually inherited from an ancestor class.
All the (Python 2.7) special method names and their semantics are listed here: https://docs.python.org/2.7/reference/datamodel.html#special-method-names
The magic method __getitem__ is basically used for accessing list items, dictionary entries, array elements etc. It is very useful for a quick lookup of instance attributes.
Here I am showing this with an example class Person that can be instantiated by 'name', 'age', and 'dob' (date of birth). The __getitem__ method is written in a way that one can access the indexed instance attributes, such as first or last name, day, month or year of the dob, etc.
import copy
# Constants that can be used to index date of birth's Date-Month-Year
D = 0; M = 1; Y = -1
class Person(object):
def __init__(self, name, age, dob):
self.name = name
self.age = age
self.dob = dob
def __getitem__(self, indx):
print ("Calling __getitem__")
p = copy.copy(self)
p.name = p.name.split(" ")[indx]
p.dob = p.dob[indx] # or, p.dob = p.dob.__getitem__(indx)
return p
Suppose one user input is as follows:
p = Person(name = 'Jonab Gutu', age = 20, dob=(1, 1, 1999))
With the help of __getitem__ method, the user can access the indexed attributes. e.g.,
print p[0].name # print first (or last) name
print p[Y].dob # print (Date or Month or ) Year of the 'date of birth'
As a side note, the __getitem__ method also allows you to turn your object into an iterable.
Example: if used with iter(), it can generate as many int squared values as you want:
class MyIterable:
def __getitem__(self, index):
return index ** 2
obj = MyIterable()
obj_iter = iter(obj)
for i in range(1000):
print(next(obj_iter))
For readability and consistency. That question is part of why operator overloading exists, since __getitem__ is one of the functions that implement that.
If you get an unknown class, written by an unknown author, and you want to add its 3rd element to its 5th element, you can very well assume that obj[3] + obj[5] will work.
What would that line look like in a language that does not support operator overloading?? Probably something like obj.get(3).add(obj.get(5))?? Or maybe obj.index(3).plus(obj.index(5))??
The problem with the second approach is that (1) it's much less readable and (2) you can't guess, you have to look up the documentation.
A common library that uses this technique is the 'email' module. It uses the __getitem__ method in the email.message.Message class, which in turn is inherited by MIME-related classes.
Then in the and all you need to get a valid MIME-type message with sane defaults is add your headers. There's a lot more going on under the hood but the usage is simple.
message = MIMEText(message_text)
message['to'] = to
message['from'] = sender
message['subject'] = subject
Django core has several interesting and nifty usages for magic methods, including __getitem__. These were my recent finds:
Django HTTP Request
When you submit GET/POST data in Django, it will be stored in Django's request object as request.GET/request.POST dict. This dict is of type QueryDict which inherits from MultiValueDict.
When you submit data, say user_id=42, QueryDict will be stored/represented as:
<QueryDict: {'user_id': ['42']}>
So, the passed data becomes
'user_id': ['42']
instead of the intuitive
'user_id': '42'
MultiValueDict's docstring explains though why it needs to auto-convert this to list format:
This class exists to solve the irritating problem raised by cgi.parse_qs, which returns a list for every key..
Given that the QueryDict values are transformed into lists, they will need to be accessed then like this (same idea with request.GET):
request.POST['user_id'][0]
request.POST['user_id'][-1]
request.POST.get('user_id')[0]
request.POST.get('user_id)[-1]
But, these are horrible ways to access the data. So. Django overridden the __getitem__ and __get__ in MultiValueDict. This is the simplified version:
def __getitem__(self, key):
"""
Accesses the list value automatically
using the `-1` list index.
"""
list_ = super().__getitem__(key)
return list_[-1]
def get(self, key, default=None):
"""
Just calls the `__getitem__` above.
"""
return self[key]
With these, you could now have a more intuitive accessors:
request.POST['user_id']
request.POST.get('user_id')
Django Forms
In Django, you could declare forms like this (includes ModelForm):
class ArticleForm(...):
title = ...
These forms inherit from BaseForm, and have these overridden magic methods (simplified version):
def __iter__(self):
for name in self.fields:
yield self[name]
def __getitem__(self, name):
return self.fields[name]
resulting to these convenient patterns:
# Instead of `for field in form.fields`.
# This is a common pattern in Django templates.
for field in form
...
# Instead of `title = form.fields['title']`
title = form['title']
In summary, magic methods (or their overrides) increase code readability and developer experience/convenience.
The use of __getitem__ includes implementing control flow measures that for some weird reason cannot be performed lower in the execution stack:
class HeavenlyList(list):
"""don't let caller get 666th element"""
def __getitem__(self, key):
"""return element"""
if isinstance(key, slice):
return [
super().__getitem__(i)
for i in range(key.start, key.stop, key.step)
if i != 666
]
return super().__getitem__(key) if key != 666 else None
A similar, but more interesting reason is to allow slice-based access to elements in container/sequence types that ordinarily don't allow it:
class SliceDict(dict):
"""handles slices"""
def __setitem__(self, key, value):
"""map key to value"""
if not isinstance(key, int)
raise TypeError("key must be an integer")
super().__setitem__(key, value)
def __getitem__(self, key):
"""return value(s)"""
if not isinstance(key, slice):
return super().__getitem__(key)
return [
super().__getitem__(i)
for i in range(key.start, key.stop, key.step)
]
Another interesting use is overriding str.__getitem__ to accept str objects as well as ints and slices, such that the str input is a regular expression, and the return value is the match object iterator returned by re.finditer:
from re import finditer
class REString(str):
"""handles regular expressions"""
re_flags = 0
def __getitem__(self, key):
"""return some/all of string or re.finditer"""
if isinstance(key, str):
return finditer(key, self, flags=self.re_flags)
return super().__getitem__(key)
A real-world problem where overriding dict.__getitem__ in particular proves useful is when a program requires information that is distributed over the internet and available over HTTP. Because these information are remote, the process can employ some level of laziness-- only retrieving data for items it doesn't have or that have changed. The specific example is having a dictionary instance lazily retrieve and store Python Enhancement Proposals. There are many of these documents, sometimes they are revised, and they all reside on hosts known by the domain name peps.python.org. Therefore the idea is to make a HTTP GET request for the PEP number passed into __getitem__, fetching it if the dictionary doesn't already contain it or the PEPs HTTP ETAG changed.
from http import HTTPStatus, client
class PEPDict(dict):
"""lazy PEP container"""
conn = client.HTTPSConnection("peps.python.org")
def __getitem__(self, pep):
"""return pep pep"""
# if lazy for too long
if self.conn.sock is None:
self.conn.connect()
# build etag check in request header
requestheaders = dict()
if pep in self:
requestheaders = {
"if-none-match": super().__getitem__(pep)[0]
}
# make request and fetch response
self.conn.request(
"GET",
"/%s/" % str(pep).zfill(4),
headers=requestheaders
)
response = self.conn.getresponse()
# (re)set the pep
if response.status = HTTPStatus.OK:
self.__setitem__(
pep, (
response.getheader("etag"),
response.read()
)
)
# raise if status is not ok or not modified
if response.status != HTTPStatus.NOT_MODIFIED:
raise Exception("something weird happened")
return super().__getitem__(pep)[1]
A good resource for understanding further what is the use of it is to review its associated special/dunder methods in the emulating container types section of Python's data model document.
OK I'll just leave this here. OP questions the very basics of software engineering.
This is about defining class interface. Consistency, readability or whatever else is secondary.
First of all this is about how different parts of the project can talk to your object.
Imagine function which calls [] on some object. Now you are tasked to do exactly what this function does with some new type object that you have. But your object is not a list or dict, or tuple.
Now you don't need to implement anything but define a __getitem__ for the class of your object.
Interfaces create building blocks out of bunch of internal implementations. Define them wisely.

How to define a type/class in Python dynamically?

In C, if I want to define a type from a name I could use the preprocessor. For example,
#define DEFINE_STRUCT(name) \
struct My##name##Struct \
{ \
int integerMember##name; \
double doubleMember##name; \
}
And then I could define a concrete struct like so
DEFINE_STRUCT(Useless);
and use the Useless struct like this
struct MyUseslessStruct instance;
So my question is
Is there a way to achieve this in Python?
I have the following class
class ClassName(SQLTable):
items = []
def __init__(self, value):
SQLTable.__init__(self)
# some common code
if value in self.items:
return
self.items.append(value)
For each ClassName the contents of items will be different, so I would like something like
def defineclass(ClassName):
class <Substitute ClassName Here>(SQLTable):
items = []
def __init__(self, value):
SQLTable.__init__(self)
# some common code
if value in self.items:
return
self.items.append(value)
I don't want to repeat the code over and over, I would like to generate it if possible.
You're very close:
def defineclass(ClassName):
class C(SQLTable):
items = []
def __init__(self, value):
SQLTable.__init__(self)
# some common code
if value in self.items:
return
self.items.append(value)
C.__name__ = ClassName
return C
As you can see, you define it using a placeholder name, then assign its __name__ attribute. After that, you return it so you can then use it as you desire in your client code. Remember, a Python class is an object just as much as any other, so you can return it, store it in a variable, put it into a dictionary, or whatever you like once you've defined it.
The __name__ attribute is a convenience, mainly so error messages make sense. You may not actually need to give each class a unique name.
An alternative for this particular use case might be to use subclassing:
class Base(SQLTable):
def __init__(self, value):
SQLTable.__init__(self)
# some common code
if value in self.items:
return
self.items.append(value)
class Thing1(Base): items = []
class Thing2(Base): items = []
By not defining items on the base class, you ensure that you must subclass it and define a per-class items to actually use the class.
kindall's answer is very clear and likely preferable, but there is a built-in function to generate classes: type. When called with one argument, it returns the type of an object. When called with three arguments it generates a new type/class. The arguments are class name, base classes, and the class dict.
def custom_init(self, value):
SqlTable.__init__(self)
if value in self.items:
return
self.items.append(value)
def defineclass(classname):
# __name__ __bases__ __dict__
return type(classname, (SQLTable,), { '__init__': custom_init,
'items': [] })

Lazy-loading variables using overloaded decorators

I have a state object that represents a system. Properties within the state object are populated from [huge] text files. As not every property is accessed every time a state instance, is created, it makes sense to lazily load them.:
class State:
def import_positions(self):
self._positions = {}
# Code which populates self._positions
#property
def positions(self):
try:
return self._positions
except AttributeError:
self.import_positions()
return self._positions
def import_forces(self):
self._forces = {}
# Code which populates self._forces
#property
def forces(self):
try:
return self._forces
except AttributeError:
self.import_forces()
return self._forces
There's a lot of repetitive boilerplate code here. Moreover, sometimes an import_abc can populate a few variables (i.e. import a few variables from a small data file if its already open).
It makes sense to overload #property such that it accepts a function to "provide" that variable, viz:
class State:
def import_positions(self):
self._positions = {}
# Code which populates self._positions
#lazyproperty(import_positions)
def positions(self):
pass
def import_forces(self):
self._forces = {}
# Code which populates self._forces and self._strain
#lazyproperty(import_forces)
def forces(self):
pass
#lazyproperty(import_forces)
def strain(self):
pass
However, I cannot seem to find a way to trace exactly what method are being called in the #property decorator. As such, I don't know how to approach overloading #property into my own #lazyproperty.
Any thoughts?
Maybe you want something like this. It's a sort of simple memoization function combined with #property.
def lazyproperty(func):
values = {}
def wrapper(self):
if not self in values:
values[self] = func(self)
return values[self]
wrapper.__name__ = func.__name__
return property(wrapper)
class State:
#lazyproperty
def positions(self):
print 'loading positions'
return {1, 2, 3}
s = State()
print s.positions
print s.positions
Which prints:
loading positions
set([1, 2, 3])
set([1, 2, 3])
Caveat: entries in the values dictionary won't be garbage collected, so it's not suitable for long-running programs. If the loaded value is immutable across all classes, it can be stored on the function object itself for better speed and memory use:
try:
return func.value
except AttributeError:
func.value = func(self)
return func.value
I think you can remove even more boilerplate by writing a custom descriptor class that decorates the loader method. The idea is to have the descriptor itself encode the lazy-loading logic, meaning that the only thing you define in an actual method is the loader itself (which is the only thing that, apparently, really does have to vary for different values). Here's an example:
class LazyDesc(object):
def __init__(self, func):
self.loader = func
self.secretAttr = '_' + func.__name__
def __get__(self, obj, cls):
try:
return getattr(obj, self.secretAttr)
except AttributeError:
print("Lazily loading", self.secretAttr)
self.loader(obj)
return getattr(obj, self.secretAttr)
class State(object):
#LazyDesc
def positions(self):
self._positions = {'some': 'positions'}
#LazyDesc
def forces(self):
self._forces = {'some': 'forces'}
Then:
>>> x = State()
>>> x.forces
Lazily loading _forces
{'some': 'forces'}
>>> x.forces
{'some': 'forces'}
>>> x.positions
Lazily loading _positions
{'some': 'positions'}
>>> x.positions
{'some': 'positions'}
Notice that the "lazy loading" message was printed only on the first access for each attribute. This version also auto-creates the "secret" attribute to hold the real data by prepending an underscore to the method name (i.e., data for positions is stored in _positions. In this example, there's no setter, so you can't do x.positions = blah (although you can still mutate the positions with x.positions['key'] = val), but the approach could be extended to allow setting as well.
The nice thing about this approach is that your lazy logic is transparently encoded in the descriptor __get__, meaning that it easily generalizes to other kinds of boilerplate that you might want to abstract away in a similar manner.
However, I cannot seem to find a way to trace exactly what method are
being called in the #property decorator.
property is actually a type (whether you use it with the decorator syntax of not is orthogonal), which implements the descriptor protocol (https://docs.python.org/2/howto/descriptor.html). An overly simplified (I skipped the deleter, doc and quite a few other things...) pure-python implementation would look like this:
class property(object):
def __init__(self, fget=None, fset=None):
self.fget = fget
self.fset = fset
def setter(self, func):
self.fset = func
return func
def __get__(self, obj, type=None):
return self.fget(obj)
def __set__(self, obj, value):
if self.fset:
self.fset(obj, value)
else:
raise AttributeError("Attribute is read-only")
Now overloading property is not necessarily the simplest solution. In fact there are actually quite a couple existing implementations out there, including Django's "cached_property" (cf http://ericplumb.com/blog/understanding-djangos-cached_property-decorator.html for more about it) and pydanny's "cached-property" package (https://pypi.python.org/pypi/cached-property/0.1.5)

OO design: an object that can be exported to a "row", while accessing header names, without repeating myself

Sorry, badly worded title. I hope a simple example will make it clear. Here's the easiest way to do what I want to do:
class Lemon(object):
headers = ['ripeness', 'colour', 'juiciness', 'seeds?']
def to_row(self):
return [self.ripeness, self.colour, self.juiciness, self.seeds > 0]
def save_lemons(lemonset):
f = open('lemons.csv', 'w')
out = csv.writer(f)
out.write(Lemon.headers)
for lemon in lemonset:
out.writerow(lemon.to_row())
This works alright for this small example, but I feel like I'm "repeating myself" in the Lemon class. And in the actual code I'm trying to write (where the number of variables I'm exporting is ~50 rather than 4, and where to_row calls a number of private methods that do a bunch of weird calculations), it becomes awkward.
As I write the code to generate a row, I need to constantly refer to the "headers" variable to make sure I'm building my list in the correct order. If I want to change the variables being outputted, I need to make sure to_row and headers are being changed in parallel (exactly the kind of thing that DRY is meant to prevent, right?).
Is there a better way I could design this code? I've been playing with function decorators, but nothing has stuck. Ideally I should still be able to get at the headers without having a particular lemon instance (i.e. it should be a class variable or class method), and I don't want to have a separate method for each variable.
In this case, getattr() is your friend: it allows you to get a variable based on a string name. For example:
def to_row(self):
return [getattr(self, head) for head in self.headers]
EDIT: to properly use the header seeds?, you would need to set the attribute seeds? for the objects. setattr(self, 'seeds?', self.seeds > 0) right above the return statement.
We could use some metaclass shenanegans to do this...
In python 2, attributes are passed to the metaclass in a dict, without
preserving order, we'll also want a base class to work with so we can
distinguish class attributes that should be mapped into the row. In python3, we could dispense with just about all of this base descriptor class.
import itertools
import functools
#functools.total_ordering
class DryDescriptor(object):
_order_gen = itertools.count()
def __init__(self, alias=None):
self.alias = alias
self.order = next(self._order_gen)
def __lt__(self, other):
return self.order < other.order
We will want a python descriptor for every attribute we wish to map into the
row. slots are a nice way to get data descriptors without much work. One
caveat, though, we'll have to manually remove the helper instance to make the
real slot descriptor visible.
class slot(DryDescriptor):
def annotate(self, attr, attrs):
del attrs[attr]
self.attr = attr
slots = attrs.setdefault('__slots__', []).append(attr)
def annotate_class(self, cls):
if self.alias is not None:
setattr(cls, self.alias, getattr(self.attr))
For computed fields, we can memoize results. Memoizing off of the annotated
instance is tricky without a memory leak, we need weakref. alternatively, we
could have arranged for another slot just to store the cached value. This also isn't quite thread safe, but pretty close.
import weakref
class memo(DryDescriptor):
_memo = None
def __call__(self, method):
self.getter = method
return self
def annotate(self, attr, attrs):
if self.alias is not None:
attrs[self.alias] = self
def annotate_class(self, cls): pass
def __get__(self, instance, owner):
if instance is None:
return self
if self._memo is None:
self._memo = weakref.WeakKeyDictionary()
try:
return self._memo[instance]
except KeyError:
return self._memo.setdefault(instance, self.getter(instance))
On the metaclass, all of the descriptors we created above are found, sorted by
creation order, and instructed to annotate the new, created class. This does
not correctly treat derived classes and could use some other conveniences like
an __init__ for all the slots.
class DryMeta(type):
def __new__(mcls, name, bases, attrs):
descriptors = sorted((value, key)
for key, value
in attrs.iteritems()
if isinstance(value, DryDescriptor))
for descriptor, attr in descriptors:
descriptor.annotate(attr, attrs)
cls = type.__new__(mcls, name, bases, attrs)
for descriptor, attr in descriptors:
descriptor.annotate_class(cls)
cls._header_descriptors = [getattr(cls, attr) for descriptor, attr in descriptors]
return cls
Finally, we want a base class to inherit from so that we can have a to_row
method. this just invokes all of the __get__s for all of the respective
descriptors, in order.
class DryBase(object):
__metaclass__ = DryMeta
def to_row(self):
cls = type(self)
return [desc.__get__(self, cls) for desc in cls._header_descriptors]
Assuming all of that is tucked away, out of sight, the definition of a class
that uses this feature is mostly free of repitition. The only short coming is
that to be practical, every field needs a python friendly name, thus we had the
alias key to associate 'seeds?' to has_seeds
class ADryRow(DryBase):
__slots__ = ['seeds']
ripeness = slot()
colour = slot()
juiciness = slot()
#memo(alias='seeds?')
def has_seeds(self):
print "Expensive!!!"
return self.seeds > 0
>>> my_row = ADryRow()
>>> my_row.ripeness = "tart"
>>> my_row.colour = "#8C2"
>>> my_row.juiciness = 0.3479
>>> my_row.seeds = 19
>>>
>>> print my_row.to_row()
Expensive!!!
['tart', '#8C2', 0.3479, True]
>>> print my_row.to_row()
['tart', '#8C2', 0.3479, True]

Categories

Resources