Optimizing modifiable named list based on namedtuple - python

My goal is to optimize a framework based on a stack of modifiers for CSV-sourced lists. Each modifier uses a header list to work on a named basis.
CSV example (including header):
date;place
13/02/2013;New York
15/04/2012;Buenos Aires
29/10/2010;Singapour
I have written some code based on namedtuple in order to be able to use lists generated by csv module without reorganizing data every time. Generated code below :
class MyNamedList(object):
__slots__ = ("__values")
_fields = ['date', 'ignore', 'place']
def __init__(self, values):
self.__values = values
if len(self.__values) <= 151:
for i in range(len(self.__values), 151):
self.__values += [None,]
#property
def date(self):
return self.__values[0]
#date.setter
def date(self, val):
self.__values[0] = val
#property
def ignore(self):
return self.__values[150]
#ignore.setter
def ignore(self, val):
self.__values[150] = val
#property
def place(self):
return self.__values[1]
#b.setter
def place(self, val):
self.__values[1] = val
I must say i am very disappointed with performance using this class. Calling a simple modifier function (which changes "ignore" to True 100 times. Yes i know it is useless) for each line of a 70000-line csv file takes 9 seconds (with pypy. 5.5 using original python) whereas equivalent code using a list named foo takes 1.1 second (same with pypy and original python).
Is there anything i could do to get comparable performance between both approaches ? To me, record.ignore = True could be directly inlined (or so) and therefore translated into record[150] = True. Is there any blocking point i don't see to get this to happen ?
Note that the record i am modifying is actually (for now) not created for each line in the CSV file, meaning adding more items into the list happens only once, before the iteration.
Update : sample codes
--> Using namedlist
import namedlist
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
def foo(a):
test.ignore = True # x100 times
import csv
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
--> Not using namedlist
import namedlist
import csv
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
sample_data = []
for i in range(len(sample_data), 151):
sample_data += [None,]
def foo(a):
sample_data[150] = True # x100 times
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
Update #2 : code for namedlist.py (heavily based on namedtuple.py
# Retrieved from http://code.activestate.com/recipes/500261/
# Licensed under the PSF license
from keyword import iskeyword as _iskeyword
import sys as _sys
def namedlist(typename, field_indices, verbose=False, rename=False):
# Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks.
field_names = field_indices.keys()
for name in [typename,] + field_names:
if not min(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
seen_names = set()
for name in field_names:
if name.startswith('_') and not rename:
raise ValueError('Field names cannot start with an underscore: %r' % name)
if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name)
# Create and fill-in the class template
numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
reprtxt = ', '.join('%s=%%r' % name for name in field_names)
max_index=-1
for name in field_names:
index = field_indices[name]
if max_index < index:
max_index = index
max_index += 1
template = '''class %(typename)s(object):
__slots__ = ("__values") \n
_fields = %(field_names)r \n
def __init__(self, values):
self.__values = values
if len(self.__values) <= %(max_index)s:
for i in range(len(self.__values), %(max_index)s):
self.__values += [None,]'''% locals()
for name in field_names:
index = field_indices[name]
template += ''' \n
#property
def %s(self):
return self.__values[%d]
#%s.setter
def %s(self, val):
self.__values[%d] = val''' % (name, index, name, name, index)
if verbose:
print template
# Execute the template string in a temporary namespace
namespace = {'__name__':'namedtuple_%s' % typename,
'_property':property, '_tuple':tuple}
try:
exec template in namespace
except SyntaxError, e:
raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
# sys._getframe is not defined (Jython for example) or sys._getframe is not
# defined for arguments greater than 0 (IronPython).
try:
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
pass
return result

Related

Python error creating an instance of an object inside another

I'm creating a very simple container object in python and one of it's functions requires to create a temporary, null placeholder class that does absolutely nothing except tell the program what to delete.
class __cyclepass(object):
"Null class as a placeholder for deleting items in a cycle."""
pass
class Cycle(object):
def __init__(self, *args):
self.l = list(args)
#.........................
def __delitem__(self, key):
"""Magic method for deleting items via indexing."""
try:
if isinstance(key, slice):
s = key.start; e = key.stop; stp = key.step if key.step else 1
slicelen = abs((s - e) // stp)
if slicelen == 1:
steps = [s]
else:
steps = [s + stp * i for i in range(slicelen)]
_list = copy(self.l)
for step in steps:
index = step % len(self)
_list[index] = __cyclepass() #this is where an error occurs
self.l = list(filter(lambda x: x.__class__ != __cyclepass, _list))
else:
index = key % len(self)
del self.l[index]
except:
raise IndexError("Bad index %s" % str(key))
Everything seems fine (albeit a little bit messy but that is aside the point), but upon running the program and implicitly calling the delitem method I get this error:
NameError: global name '_Cycle__cyclepass' is not defined
What on earth would be causing it to look for _Cycle__cyclepass when creating the __cyclepass object?
[moving my comment to an answer, as suggested]
This is the result of python's name mangling of "private" members. Rename from a double-underscore prefix to single-underscore, and problem is solved.

Python model inheritance and order of model declaration

The following code:
class ParentModel(models.Model):
pass
class ChildA(ChildB):
pass
class ChildB(ParentModel):
pass
Obviously fails with the message.
NameError: name "ChildB" is not defined
Is there anyway to get around this issue, without actually reordering the class definitions? (The code is auto-generated, about 45K lines, and the order of classes is random).
Perfectionists look away!!
This is a workaround (hack); the solution would be to solve the incorrect declaration order.
WARNING: This is extremely daft.
Concept:
Imagine a namespace where anything can exist. Literally anything that is asked of it. Not the smartest thing usually but out-of-order declaration isn't smart either, so why not?
The key problem of out-of-sequence classes is that dependent classes were being defined before their dependencies, the base classes. At that point of evaluation, the base classes are undefined resulting in a NameError.
Wrapping each class in try except statements would take as much effort as rewriting the module anyway, so that can be dismissed out of hand.
A more efficient (in terms of programmer time) means of suppressing NameError must be used. This can be achieved by making the namespace totally permissible, as in, if a lookup object doesn't exist, it should be created thereby avoiding a NameError. This is the obvious danger of this approach as a lookup becomes a creation.
Implementation:
Namespaces in Python are dictionaries, I believe, and dictionaries methods can be overloaded, including the lookup function: __getitem__. So mr_agreeable is a dictionary subclass with an overloaded __getitem__ method which automatically creates a blank class when a lookup key doesn't exist. An instance of mr_agreeable is passed to execfile as the namespace for the classes.py script. The objects (aside from the builtins) created execfile call are merged with the globals() dict of the calling script: hack.py.
This works because Python doesn't care if class' base classes are changed after the fact.
This may be implementation dependent, I don't know. Tested on: Python 2.7.3 64bit on Win7 64bit.
Assuming your out-of-order classes are defined in classes.py:
class ParentModel(object):
name = "parentmodel"
class ChildC(ChildA):
name = "childc"
class ChildA(ChildB):
name = "childa"
class ChildB(ParentModel):
name = "childb"
The loader script, lets call it hack.py:
from random import randint
from codecs import encode
class mr_agreeable(dict):
sin_counter = 0
nutty_factor = 0
rdict = {0 : (0, 9), 200 : (10, 14), 500 : (15, 16), 550 : (17, 22)}
def __getitem__(self, key):
class tmp(object):
pass
tmp.__name__ = key
if(not key in self.keys()):
self.prognosis()
print self.insanity()
return self.setdefault(key, tmp)
def prognosis(self):
self.sin_counter += 1
self.nutty_factor = max(filter(lambda x: x < self.sin_counter, self.rdict.keys()))
def insanity(self):
insane_strs = \
[
"Nofbyhgryl", "Fher, jul abg?", "Sbe fher", "Fbhaqf terng", "Qrsvangryl", "Pbhyqa'g nterr zber",
"Jung pbhyq tb jebat?", "Bxl Qbnxl", "Lrc", "V srry gur fnzr jnl", "Zneel zl qnhtugre",
"Znlor lbh fubhyq svk gung", "1 AnzrReebe vf bar gbb znal naq n 1000'f abg rabhtu", "V'ir qbar qvegvre guvatf",
"Gur ebbz vf fgnegvat gb fcva", "Cebonoyl abg", "Npghnyyl, ab ..... nyevtug gura", "ZNXR VG FGBC",
"BU TBQ AB", "CYRNFR AB", "LBH'ER OERNXVAT CLGUBA", "GUVF VF ABG PBAFRAGHNY", "V'Z GRYYVAT THVQB!!"
]
return encode("ze_nterrnoyr: " + insane_strs[randint(*self.rdict[self.nutty_factor])], "rot13")
def the_act():
ns = mr_agreeable()
execfile("classes.py", ns)
hostages = list(set(ns.keys()) - set(["__builtins__", "object"]))
globals().update([(key, ns[key]) for key in hostages])
the_act()
mr_agreeable acts as the permissible namespace to the complied classes.py. He reminds you this is bad form.
My previous answer showed a loader script that executed the out of order script in execfile but provided a dynamic name space that created placeholder classes (these are typically base classes sourced before they are defined). It then loaded the changes from this name space in the loader's global namespace.
This approach has two problems:
1) Its a hack
2) The assumed class of the placeholders is the object class. So when:
class ChildC(ChildA):
name = "childc"
is evaluated, the namespace detects ChildA is undefined and so creates a placeholder class (an object subclass). When ChildA is actually defined (in the out-of-order script), it might be of a different base class than object and so rebasing ChildC to the new ChildA will fail if ChildA's base is not the object class (what ChildC was originally created with). See this for more info.
So I created a new script, which actually rewrites the input out-of-order script using a similar concept to the previous hack and this script. The new script is used by calling:
python mr_agreeable.py -i out_of_order.py -o ordered.py
mr_agreeable.py:
import os
import sys
from codecs import encode
from random import randint
import getopt
import inspect
import types
__doc__ = \
'''
A python script that re-orders out of sequence class defintions
'''
class rebase_meta(type):
'''
Rebase metaclass
Automatically rebases classes created with this metaclass upon
modification of classes base classes
'''
org_base_classes = {}
org_base_classes_subs = {}
base_classes = {}
base_classes_subs = {}
mod_loaded = False
mod_name = ""
mod_name_space = {}
def __init__(cls, cls_name, cls_bases, cls_dct):
#print "Making class: %s" % cls_name
super(rebase_meta, cls).__init__(cls_name, cls_bases, cls_dct)
# Remove the old base sub class listings
bases = rebase_meta.base_classes_subs.items()
for (base_cls_name, sub_dict) in bases:
sub_dict.pop(cls_name, None)
# Add class to bases' sub class listings
for cls_base in cls_bases:
if(not rebase_meta.base_classes_subs.has_key(cls_base.__name__)):
rebase_meta.base_classes_subs[cls_base.__name__] = {}
rebase_meta.base_classes[cls_base.__name__] = cls_base
rebase_meta.base_classes_subs[cls_base.__name__][cls_name] = cls
# Rebase the sub classes to the new base
if(rebase_meta.base_classes.has_key(cls_name)): # Is class a base class
subs = rebase_meta.base_classes_subs[cls_name]
rebase_meta.base_classes[cls_name] = cls # Update base class dictionary to new class
for (sub_cls_name, sub_cls) in subs.items():
if(cls_name == sub_cls_name):
continue
sub_bases_names = [x.__name__ for x in sub_cls.__bases__]
sub_bases = tuple([rebase_meta.base_classes[x] for x in sub_bases_names])
try:
# Attempt to rebase sub class
sub_cls.__bases__ = sub_bases
#print "Rebased class: %s" % sub_cls_name
except TypeError:
# The old sub class is incompatible with the new base class, so remake the sub
if(rebase_meta.mod_loaded):
new_sub_cls = rebase_meta(sub_cls_name, sub_bases, dict(sub_cls.__dict__.items() + [("__module__", rebase_meta.mod_name)]))
rebase_meta.mod_name_space[sub_cls_name] = new_sub_cls
else:
new_sub_cls = rebase_meta(sub_cls_name, sub_bases, dict(sub_cls.__dict__.items()))
subs[sub_cls_name] = new_sub_cls
#classmethod
def register_mod(self, imod_name, imod_name_space):
if(not self.mod_loaded):
self.org_base_classes = self.base_classes.copy()
self.org_base_classes_subs = self.base_classes_subs.copy()
self.mod_loaded = True
else:
self.base_classes = self.org_base_classes
self.base_classes_subs = self.org_base_classes_subs
self.mod_name = imod_name
self.mod_name_space = imod_name_space
# Can't subclass these classes
forbidden_subs = \
[
"bool",
"buffer",
"memoryview",
"slice",
"type",
"xrange",
]
# Builtin, sub-classable classes
org_class_types = filter(lambda x: isinstance(x, type) and (not x.__name__ in forbidden_subs) and x.__module__ == "__builtin__", types.__builtins__.values())
# Builtin classes recreated with Rebasing metaclass
class_types = [(cls.__name__, rebase_meta(cls.__name__, (cls,), {})) for cls in org_class_types]
# Overwrite builtin classes
globals().update(class_types)
class mr_quiet(dict):
'''
A namespace class that creates placeholder classes upon
a non existant lookup. mr_quiet doesnt say much.
'''
def __getitem__(self, key):
if(not key in self.keys()):
if(hasattr(__builtins__, key)):
return getattr(__builtins__, key)
else:
if(not key in self.keys()):
self.sanity_check()
return self.setdefault(key, rebase_meta(key, (object,), {}))
else:
return dict.__getitem__(self, key)
def sanity_check(self):
pass
class mr_agreeable(mr_quiet):
'''
A talkative cousin of mr_quiet.
'''
sin_counter = 0
nutty_factor = 0
rdict = {0 : (0, 9), 200 : (10, 14), 500 : (15, 16), 550 : (17, 22)}
def sanity_check(self):
self.prognogsis()
print self.insanity()
def prognogsis(self):
self.sin_counter += 1
self.nutty_factor = max(filter(lambda x: x < self.sin_counter, self.rdict.keys()))
def insanity(self):
insane_strs = \
[
"Nofbyhgryl", "Fher, jul abg?", "Sbe fher", "Fbhaqf terng", "Qrsvangryl", "Pbhyqa'g nterr zber",
"Jung pbhyq tb jebat?", "Bxl Qbnxl", "Lrc", "V srry gur fnzr jnl", "Zneel zl qnhtugre",
"Znlor lbh fubhyq svk gung", "1 AnzrReebe vf bar gbb znal naq n 1000'f abg rabhtu", "V'ir qbar qvegvre guvatf",
"Gur ebbz vf fgnegvat gb fcva", "Cebonoyl abg", "Npghnyyl, ab ..... nyevtug gura", "ZNXR VG FGBC",
"BU TBQ AB", "CYRNFR AB", "LBH'ER OERNXVAT CLGUBA", "GUVF VF ABG PBAFRAGHNY", "V'Z GRYYVAT THVQB!!"
]
return encode("ze_nterrnoyr: " + insane_strs[randint(*self.rdict[self.nutty_factor])], "rot13")
def coll_up(ilist, base = 0, count = 0):
'''
Recursively collapse nested lists at depth base and above
'''
tlist = []
if(isinstance(ilist, __builtins__.list) or isinstance(ilist, __builtins__.tuple)):
for q in ilist:
tlist += coll_up(q, base, count + 1)
else:
if(base > count):
tlist = ilist
else:
tlist = [ilist]
return [tlist] if((count != 0) and (base > count)) else tlist
def build_base_dict(ilist):
'''
Creates a dictionary of class : class bases pairs
'''
base_dict = {}
def build_base_dict_helper(iclass, idict):
idict[iclass] = list(iclass.__bases__)
for x in iclass.__bases__:
build_base_dict_helper(x, idict)
for cur_class in ilist:
build_base_dict_helper(cur_class, base_dict)
return base_dict
def transform_base_to_sub(idict):
'''
Transforms a base dict into dictionary of class : sub classes pairs
'''
sub_dict = {}
classes = idict.keys()
for cur_class in idict:
sub_dict[cur_class] = filter(lambda cls: cur_class in idict[cls], classes)
return sub_dict
recur_class_helper = lambda idict, ilist = []: [[key, recur_class_helper(idict, idict[key])] for key in ilist]
recur_class = lambda idict: recur_class_helper(idict, idict.keys())
class proc_func(list):
'''
Cmdline processing class
'''
def __init__(self, name = "", *args, **kwargs):
self.name = name
super(list, self).__init__(*args, **kwargs)
def get_args(self, *args):
self.extend(filter(lambda x: x, args))
def __call__(self, *args):
print self.name
print self
class proc_inputs(proc_func):
def get_args(self, *args):
self.extend(filter(os.path.isfile, args))
class proc_outputs(proc_func):
pass
class proc_helper(proc_func):
'''
Help function
Print help information
'''
def get_args(self, *args):
self()
def __call__(self, *args):
print __file__
print __doc__
print "Help:\n\t%s -h -i inputfile -o ouputfile" % sys.argv[0]
print "\t\t-h or --help\tPrint this help message"
print "\t\t-i or --input\tSpecifies the input script"
print "\t\t-o or --output\tSpecifies the output script"
sys.exit()
if __name__ == "__main__":
proc_input = proc_inputs("input")
proc_output = proc_outputs("output")
proc_help = proc_helper("help")
cmd_line_map = \
{
"-i" : proc_input,
"--input" : proc_input,
"-o" : proc_output,
"--ouput" : proc_output,
"-h" : proc_help,
"--help" : proc_help
}
try:
optlist, args = getopt.getopt(sys.argv[1:], "hi:o:", ["help", "input=", "output="])
for (key, value) in optlist:
cmd_line_map[key].get_args(value)
except getopt.GetoptError:
proc_help()
if(len(proc_input) != len(proc_output)):
print "Input files must have a matching output file"
proc_help()
elif(not proc_input):
proc_help()
else:
in_out_pairs = zip(proc_input, proc_output)
for (in_file, out_file) in in_out_pairs:
dodgy_module_name = os.path.splitext(in_file)[0]
sys.modules[dodgy_module_name] = types.ModuleType(dodgy_module_name)
sys.modules[dodgy_module_name].__file__ = in_file
# Make a fake space post haste
name_space = mr_agreeable\
(
[
("__name__", dodgy_module_name), # Needed for the created classes to identify with the fake module
("__module__", dodgy_module_name), # Needed to fool the inspect module
] + \
class_types
)
# Exclude these from returning
exclusions = name_space.keys()
# Associate the fake name space to the rebasing metaclass
rebase_meta.register_mod(dodgy_module_name, name_space)
# Run dodgy code
execfile(in_file, name_space)
# Bring back dodgy classes
import_classes = [cls if(isinstance(cls, type) and not cls_name in exclusions) else None for (cls_name, cls) in name_space.items()]
dodgy_import_classes = filter(lambda x: x, import_classes)
# Create base and sub class dictionaries
base_dict = build_base_dict(dodgy_import_classes)
sub_dict = transform_base_to_sub(base_dict)
# Create sets of base and sub classes
base_set = reduce(lambda x, y: x | y, map(set, base_dict.values()), set([]))
sub_set = reduce(lambda x, y: x | y, map(set, sub_dict.values()), set([]))
kings = list(base_set - sub_set) # A list of bases which are not subs
kingdoms = recur_class_helper(sub_dict, kings) # A subclass tree of lists
lineages = coll_up(kingdoms, 2) # Flatten the tree branches at and below 2nd level
# Filter only for the clases created in the dodgy module
inbred_lines = [filter(lambda x: x.__module__ == dodgy_module_name, lineage) for lineage in lineages]
# Load Source
for lineage in inbred_lines:
for cls in lineage:
setattr(cls, "_source", inspect.getsource(cls))
# Write Source
with open(out_file, "w") as file_h:
for lineage in inbred_lines:
for cls in lineage:
file_h.write(cls._source + "\n")

checking and fixing values appended to a node with lxml.objectify in python

I'm trying to modify PyKML, which uses lxml.objectify. With a track node append, this gets turned into a string with the default str() behavior. I'd like to catch appends of lists or tuples and convert them to proper place separated lines rather than '(xx.xxxx, yy.yyyy)'
from pykml.factory import GX_ElementMaker as GX
track = GX.Track(id='track_%d' % group_num )
for pt in group:
when = datetime.datetime.utcfromtimestamp(pt['ts'])
track.append( KML.when( when ) ) # WHEN?
for pt in group:
track.append( GX.coord( (pt['x'],pt['y'])) ) # <-- trouble here
Thanks,
-kurt
Create a clean ElementMaker class with the namespace info. Then create a subclass with the node name as a method. In that method, handle all the odd cases. Then craft the string that would go in that node names place and return an instance of the clean ElementMaker class with that node name.
http://code.google.com/r/schwehr-pykml/source/browse/src/pykml/factory.py?spec=svn05a10cef3fd3c430389e8aca1313a20da932e565&r=05a10cef3fd3c430389e8aca1313a20da932e565
def indexable_levels(args):
#print 'args:',args
levels = 0
while True:
if isinstance(args,str): break
try:
args = args[0]
levels += 1
except:
break
#print ' levels ->',levels
return levels
# Create a factory object for the KML Google Extension namespace
_GX_ElementMakerSimple = objectify.ElementMaker(
annotate=False,
namespace=nsmap['gx'],
nsmap={'gx': nsmap['gx']},
)
class _GX_ElementMaker (objectify.ElementMaker):
'KML ElementMaker with overloads for custom text payloads like coordinates'
def coord(self, *args):
#print 'start coord: "%s"' % (str(args)), type(args), len(args)
levels = indexable_levels(args)
if levels == 1 and len(args) == 1:
# This case is really redundant with the next
assert isinstance(args[0],str)
return _GX_ElementMakerSimple.coord(args[0])
if levels == 1:
return _GX_ElementMakerSimple.coord(' '.join([str(item) for item in args]))
if levels == 2:
# ((-121.583851, 37.386052),)
assert(len(args)==1)
return _GX_ElementMakerSimple.coord(' '.join([str(item) for item in args[0] ]))
assert(False)
# Create a factory object for the KML Google Extension namespace
GX_ElementMaker = _GX_ElementMaker(
annotate=False,
namespace=nsmap['gx'],
nsmap={'gx': nsmap['gx']},
)

Special Python dict with object IDs

I want to create a special dictionary which uses object IDs as keys, like this:
class ObjectIdDict(dict):
def __setitem__(self, key, value):
super(ObjectIdDict, self).__setitem__(id(key), value)
def __getitem__(self, key):
super(ObjectIdDict, self).__getitem__(id(key))
But if I run the following test, I get an error:
class ObjectIdDictTest(unittest.TestCase):
def test_get_and_set(self):
dict_to_test = ObjectIdDict()
class Something:
def __init__(self):
self.x = 1
s = Something()
dict_to_test[s.x] = "message"
self.assertEqual(dict_to_test[s.x], "message")
Error message:
AssertionError: None != 'message'
What is wrong here?
Background:
The reason for creating such an exotic dict is that I want to store validation errors for each field of an object and want to avoid field names as strings: domain_object.errors[domain_object.field1] otherwise field names as strings (domain_object.errors["field1"]) would be bad for refactoring and code completion.
ΤΖΩΤΖΙΟΥ:
I'm certain you don't get anything by
using IDs. obj.field1= 1;
print(id(obj.field1)); obj.field1= 2;
print(id(obj.field1))
If I would not use IDs, the key would be the value of the variable, not its address. This would lead to errors if two fields had the same value:
def test_ordinary_dict(self):
dict_to_test = {}
class Something:
def __init__(self):
self.x = 1
self.y = 1 # same value as self.x!
s = Something()
dict_to_test[s.x] = "message for x"
dict_to_test[s.y] = "message for y"
self.assertEqual(dict_to_test[s.x], "message for x")
# fails because dict_to_test[s.x] == dict_to_test[1] what results in:
# "message for y"
It is not critical that changing a variables value lead to a new address since the validation result is no longer valid after that.
__getitem__ must return the result:
def __getitem__(self, key):
return super(ObjectIdDict, self).__getitem__(id(key))
#^^^^^
Without a return, the implicit return value is None, and therefore oiddict[key] is None for all keys.

Pythonic way to avoid a mountain of if...else statements?

This has come up several times recently and I'd like to deal with it better than I have been: I have a series of attributes that I'm cross referencing between an object and a dictionary. If the value is different between them, I want to set the object.attribute to the dictionary['attribute'] value. I also want to keep track of what's getting changed.
Now, my first thought is to just use an if else statement for every attribute, but after writing a few of these it's apparent that I'm re-writing the same code again and again. There has to be a DRY way to do this, where I specify only the parts that are changing every time, and then loop through all the attributes.
In production code, there are 15 different attributes, but my example below will just use 2 for simplicity. I have some idea about how to do this in a clever way, but I'm missing the final step of actually setting the object.attribute equal to the dictionary['attribute'] value.
# Simulated data setup - not under my control IRL
class someClass:
def __init__(self, name, version):
self.name = name
self.version = version
objA = someClass('Test1','1.1')
dictA = {'name':'Test1','revision':'1.2'}
# My code below
# option 1 - a series of for loops
def updateAttributesSimple(obj, adict, msg):
if obj.name == adict['name']:
msg.append('Name is the same')
else:
msg.append('Name was updated from %s to %s' % (obj.name, adict['name']))
obj.name = adict['name']
if obj.version == adict['revision']:
msg.append('Version is the same')
else:
msg.append('Version was updated from %s to %s' % (obj.version, adict['revision']))
obj.version = adict['revision']
# option 2 - trying to be clever about this
def updateAttributesClever(obj, adict, msg):
attributeList = (('Name', obj.name, adict['name']),
('Version', obj.version, adict['revision']))
for valTuple in attributeList:
if valTuple[1] == valTuple[2]:
msg.append('%s is the same' % (valTuple[0]))
else:
msg.append('%s was updated from %s to %s' % (valTuple[0], valTuple[1], valTuple[2]))
# code to set valTuple[1] = valTuple[2] goes here, but what is it?
# valTuple[1] = valTuple[2] attempts to set the desired value to a string, rather than the attribute of obj itself
msg = ['Updating Attributes simple way:']
updateAttributesSimple(objA, dictA, msg)
print '\n\t'.join(msg)
#reset data
objA = someClass('Test1','1.1')
dictA = {'name':'Test1','revision':'1.2'}
msg = ['Updating Attributes clever way:']
updateAttributesClever(objB, dictB, msg)
print '\n\t'.join(msg)
The idea being that this way, whenever I need to add another attribute, I can just update the list of attributes being inspected and the rest of the code is already written. What's the Pythonic way to accomplish this?
setattr() is what you're looking for:
attributeList = (('Name', 'name', 'name'),
('Version', 'version', 'revision'))
for title, obj_attribute, dict_key in attributeList:
obj_value = getattr(obj, obj_attribute)
adict_value = adict[dict_key]
if obj_value == adict_value:
msg.append('%s is the same' % (obj_value))
else:
msg.append('%s was updated from %s to %s' % (title, obj_value, adict_value))
setattr(obj, obj_attribute, adict_value)
This should work for your:
class X(object):
def __init__(self):
self.a = 1
self.b = 2
x = X()
d = dict()
d['a'] = 1
d['b'] = 3
def updateAttributes(obj,dic):
def update(name):
val = dic[name]
if getattr(obj,name)==val:
print name,"was equal"
else:
print "setting %s to %s" % (name,val)
setattr(obj,name,val)
for name in ['a','b']:
update(name)
updateAttributes(x,d)
print x.a
print x.b
You might want to think about creating a function which can take an arbitrary object and convert the dictionary of name/value pairs into something more meaningful. It's not strictly a "Python" strategy but something that is fairly easy to do in Python because of its support of closures and how it treats objects under the hood:
def checkUpdates( obj ):
def updated( dictionaryPrevious, msg ):
for name, value in dictionaryPrevious.items():
if( obj.__dict__[name] == value ):
msg.append('Name is the same')
else:
msg.append(name + 'has been changed!')
obj.__dict__[name] = value
return updated
I am making one assumption, the names in the dictionary always correspond to the object variables. If they're not the same you'll need to make a mapping.
edit:
() => [] and object => obj. thanks guys. Sometimes you go from one language to a few others and it all gets muddled.
A couple of answers are close, but to handle that fact that the name of the key in the dict don't match the corresponding object's attribute name, you'll need some way to handle that. This can be easily done by adding yet another dictionary mapping the names of keys in the dict to the names of the object's attributes.
class someClass:
def __init__(self, name, version):
self.name = name
self.version = version
objA = someClass('Test1','1.1')
dictA = {'name':'Test1','revision':'1.2'}
keymap = {'name':'name', 'revision':'version'}
def updateAttributesGeneric(obj, adict, key2attr, msg):
for key, value in adict.iteritems():
attrname = key2attr[key]
if getattr(obj, attrname) == value:
msg.append('%s is the same' % attrname)
else:
msg.append('%s has been changed' % attrname)
setattr(obj, attrname, adict[key])
msg = ['Updating Attributes:']
updateAttributesGeneric(objA, dictA, keymap, msg)
print '\n\t'.join(msg)
# Updating Attributes:
# name is the same
# version has been changed

Categories

Resources