Say I've got this simple little Pony ORM mapping here. The built-in Enum class is new as of Python 3.4, and backported to 2.7.
from enum import Enum
from pony.orm import Database, Required
class State(Enum):
ready = 0
running = 1
errored = 2
if __name__ == '__main__':
db = Database('sqlite', ':memory:', create_db=True)
class StateTable(db.Entity):
state = Required(State)
db.generate_mapping(create_tables=True)
When I run the program, an error is thrown.
TypeError: No database converter found for type <enum 'State'>
This happens because Pony doesn't support mapping the enum type. Of course, the workaround here is to just store the Enum value, and provide a getter in Class StateTable to convert the value to the Enum once again. But this is tedious and error prone. I can also just use another ORM. Maybe I will if this issue becomes too much of a headache. But I would rather stick with Pony if I can.
I would much rather create a database converter to store the enum, like the error message is hinting at. Does anyone know how to do this?
UPDATE:
Thanks to Ethan's help, I have come up with the following solution.
from enum import Enum
from pony.orm import Database, Required, db_session
from pony.orm.dbapiprovider import StrConverter
class State(Enum):
ready = 0
running = 1
errored = 2
class EnumConverter(StrConverter):
def validate(self, val):
if not isinstance(val, Enum):
raise ValueError('Must be an Enum. Got {}'.format(type(val)))
return val
def py2sql(self, val):
return val.name
def sql2py(self, value):
# Any enum type can be used, so py_type ensures the correct one is used to create the enum instance
return self.py_type[value]
if __name__ == '__main__':
db = Database('sqlite', ':memory:', create_db=True)
# Register the type converter with the database
db.provider.converter_classes.append((Enum, EnumConverter))
class StateTable(db.Entity):
state = Required(State)
db.generate_mapping(create_tables=True)
with db_session:
s = StateTable(state=State.ready)
print('Got {} from db'.format(s.state))
Excerpt from some random mailing list:
2.2. CONVERTER METHODS
Each converter class should define the following methods:
class MySpecificConverter(Converter):
def init(self, kwargs):
# Override this method to process additional positional
# and keyword arguments of the attribute
if self.attr is not None:
# self.attr.args can be analyzed here
self.args = self.attr.args
self.my_optional_argument = kwargs.pop("kwarg_name")
# You should take all valid options from this kwargs
# What is left in is regarded as unrecognized option
def validate(self, val):
# convert value to the necessary type (e.g. from string)
# validate all necessary constraints (e.g. min/max bounds)
return val
def py2sql(self, val):
# prepare the value (if necessary) to storing in the database
return val
def sql2py(self, value):
# convert value (if necessary) after the reading from the db
return val
def sql_type(self):
# generate corresponding SQL type, based on attribute options
return "SOME_SQL_TYPE_DEFINITION"
You can study the code of the existing converters to see how these methods
are implemented.
Related
Problem in a nutshell
I am having issues with the hypothesis build strategy and custom pydantic data types (no values are returned when invoking the build strategy on my custom data type.
Problem in more detail
Given the following pydantic custom type, which just validates if a value is a timezone.
import pytz
from pydantic import StrictStr
TIMEZONES = pytz.common_timezones_set
class CountryTimeZone(StrictStr):
"""Validate a country timezone."""
#classmethod
def __get_validators__(cls):
yield from super().__get_validators__()
yield cls.validate_timezone
#classmethod
def validate_timezone(cls, v):
breakpoint()
if v not in TIMEZONES:
raise ValueError(f"{v} is not a valid country timezone")
return v
#classmethod
def __modify_schema__(cls, field_schema):
field_schema.update(examples=TIMEZONES)
When I attempt to use this in some schema...
from pydantic import BaseModel
class Foo(BaseModel):
bar: CountryTimeZone
and subsequently try to build an example in a test, using the pydantic hypothesis plugin like.
from hypothesis import given
from hypothesis import strategies as st
#given(st.builds(Foo))
def test_something_interesting(schema) -> None:
# Some assertions
...
schema.bar is always "".
Questions
Is there something missing from this implementation, meaning that values like "Asia/Krasnoyarsk" aren't being generated? From the documentation, examples like PaymentCardNumber and EmailStr build as expected.
Even when using StrictStr by itself, the resulting value is also an empty string. I tried to inherit from str but still no luck.
Came across the same problem today. Seems like the wording in the hypothesis plugin docs give the wrong impression. Pydantic has written hypothesis integrations for their custom types, not that hypothesis supports custom pydantic types out of the box.
Here is a full example of creating a custom class, assigning it a test strategy and using it in a pydantic model.
import re
from hypothesis import given, strategies as st
from pydantic import BaseModel
CAPITAL_WORD = r"^[A-Z][a-z]+"
CAPITAL_WORD_REG = re.compile(CAPITAL_WORD)
class MustBeCapitalWord(str):
"""Custom class that validates the string is a single of only letters
starting with a capital case letter."""
#classmethod
def __get_validators__(cls):
yield cls.validate
#classmethod
def __modify_schema__(cls, field_schema):
# optional stuff, updates the schema if you choose to export the
# pydantic schema
field_schema.UPDATE(
pattern=CAPITAL_WORD,
examples=["Hello", "World"],
)
#classmethod
def validate(cls, v):
if not isinstance(v, str):
raise TypeError("string required")
if not v:
raise ValueError("No capital letter found")
elif CAPITAL_WORD_REG.match(v) is None:
raise ValueError("Input is not a valid word starting with capital letter")
return cls(v)
def __repr__(self):
return f"MustBeCapitalWord({super().__repr__()})"
# register a strategy for our custom type
st.register_type_strategy(
MustBeCapitalWord,
st.from_regex(CAPITAL_WORD, fullmatch=True),
)
# use our custom type in a pydantic model
class Model(BaseModel):
word: MustBeCapitalWord
# test it all
#given(st.builds(Model))
def test_model(instance):
assert instance.word[0].isupper()
I'm facing some difficulties unittest my project, mainly due to the fact that the controllers reference a singleton produced by a factory.
A simple demonstration of this problem would be:
databasefactory.py
class DataBaseFactory(object):
# Lets imagine we support a number of databases. The client implementation all gives us a similar interfaces to use
# This is a singleton through the whole application
_database_client = None
#classmethod
def get_database_client(cls):
# type: () -> DataBaseClientInterFace
if not cls._database_client:
cls._database_client = DataBaseClient()
return cls._database_client
class DataBaseClientInterFace(object):
def get(self, key):
# type: (any) -> any
raise NotImplementedError()
def set(self, key, value):
# type: (any, any) -> any
raise NotImplementedError()
class DataBaseClient(DataBaseClientInterFace):
# Mock some real world database - The unittest mocking should be providing another client
_real_world_data = {}
def get(self, key):
return self._real_world_data[key]
def set(self, key, value):
self._real_world_data[key] = value
return value
model.py
from .databasefactory import DataBaseFactory
class DataModel(object):
# The DataBase type never changes so its a constant
DATA_BASE_CLIENT = DataBaseFactory.get_database_client()
def __init__(self, model_name):
self.model_name = model_name
def save(self):
# type: () -> None
"""
Save the current model into the database
"""
key = self.get_model_key()
data = vars(self)
self.DATA_BASE_CLIENT.set(key, data)
#classmethod
def load(cls):
# type: () -> DataModel
"""
Load the model
"""
key = cls.get_model_key()
data = cls.DATA_BASE_CLIENT.get(key)
return cls(**data)
#staticmethod
def get_model_key():
return 'model_test'
datacontroller.py
from .databasefactory import DataBaseFactory
from .model import DataModel
class DataBaseController(object):
"""
Does some stuff with the databaase
"""
# Also needs the database client. This is the same instance as on DataModel
DATA_BASE_CLIENT = DataBaseFactory.get_database_client()
_special_key = 'not_model_key'
#staticmethod
def save_a_model():
a_model = DataModel('test')
a_model.save()
#staticmethod
def load_a_model():
a_model = DataModel.load()
return a_model
#classmethod
def get_some_special_key(cls):
return cls.DATA_BASE_CLIENT.get(cls._special_key)
#classmethod
def set_some_special_key(cls):
return cls.DATA_BASE_CLIENT.set(cls._special_key, 1)
And finally the unittest itself:
test_simple.py
import unittest
from .databasefactory import DataBaseClientInterFace
from .datacontroller import DataBaseController
from .model import DataModel
class MockedDataBaseClient(DataBaseClientInterFace):
_mocked_data = {DataBaseController._special_key: 2,
DataModel.get_model_key(): {'model_name': 'mocked_test'}}
def get(self, key):
return self._mocked_data[key]
def set(self, key, value):
self._mocked_data[key] = value
return value
class SimpleOne(unittest.TestCase):
def test_controller(self):
"""
I want to mock the singleton instance referenced in both DataBaseController and DataModel
As DataBaseController imports DataModel, both classes have the DATA_BASE_CLIENT attributed instantiated with the factory result
"""
# Initially it'll throw a keyerror
with self.assertRaises(KeyError):
DataBaseController.get_some_special_key()
# Its impossible to just change the DATA_BASE_CLIENT in the DataBaseController as DataModel still points towards the real implementation
# Should not be done as it won't change anything to data model
DataBaseController.DATA_BASE_CLIENT = MockedDataBaseClient()
self.assertEqual(DataBaseController.get_some_special_key(), 2)
# Will fail as the DataModel still uses the real implementation
# I'd like to mock DATA_BASE_CLIENT for both classes without explicitely giving inserting a new class
# The project I'm working on has a number of these constants that make it a real hassle to inject it a new one
# There has to be a better way to tackle this issue
model = DataBaseController.load_a_model()
The moment the unittest imports the DataBaseController, DataModel is imported through the DataBaseController module.
This means that both DATA_BASE_CLIENT class variables are instantiated.
If my factory were to catch it running inside a unittest, it still would not matter as the import happens outside the unittest.
My question is: is there a way to mock this singleton and replace across the whole application at once?
Replacing the cached instance on the factory is not an option as the references in the classes point to the old object.
It might be a design flaw to put these singleton instances as class variables in the first place. But I'd rather retrieve a class variable than calling the factory each time for the singleton.
In your use case, a single module is in charge of providing the singleton to the whole application. So I would try to inject the mock in that module before it is used by anything else. The problem is that the mock cannot be fully constructed before the other classes are declared. A possible way is to construct the singleton in 2 passes: first pass does not depend on anything, then that minimal object is used to construct the classes and then its internal dictionnary is populated. Code could be:
import unittest
from .databasefactory import DataBaseClientInterFace
class MockedDataBaseClient(DataBaseClientInterFace):
_mocked_data = {} # no dependance outside databasefactory
def get(self, key):
return self._mocked_data[key]
def set(self, key, value):
self._mocked_data[key] = value
return value
# inject the mock into DataBaseFactory
from .databasefactory import DataBaseFactory
DataBaseFactory._database_client = MockedDataBaseClient()
# use the empty mock to construct other classes
from .datacontroller import DataBaseController
from .model import DataModel
# and populate the mock
DataBaseFactory._database_client._mocked_data.update(
{DataBaseController._special_key: 2,
DataModel.get_model_key(): {'model_name': 'mocked_test'}})
class SimpleOne(unittest.TestCase):
def test_controller(self):
"""
I want to mock the singleton instance referenced in both DataBaseController and DataModel
As DataBaseController imports DataModel, both classes have the DATA_BASE_CLIENT attributed instantiated with the factory result
"""
self.assertEqual(DataBaseController.get_some_special_key(), 2)
model = DataBaseController.load_a_model()
self.assertEqual('mocked_test', model.model_name)
But beware: this assumes that the test procedure does not load model.py or datacontroller.py before test_simple.py
I have a project where every table has some common fields, e.g., status, and I'd like to alias all of them. Is it possible to do this without manually adding the alias to each class? E.g., here's what I have now:
from core import foo_table, bar_table, Status
Base = declarative_base()
def CustomBase(object):
#property
def status(self):
return Status(self._status)
...
def Foo(Base, CustomBase):
__table__ = foo_table
_status = foo_table.c.status
...
def Bar(Base, CustomBase):
__table__ = bar_table
_status = bar_table.c.status
...
Ideally, I'd like to be able to set up my _status alias on CustomBase instead of in Foo and Bar, or set up my project so that the alias is added whenever a class extending CustomBase is loaded. Is this possible or am I trying to accomplish this in the wrong way? I know I can make it work if I rename the status field in my db or rename the status property in the CustomBase, but I'd prefer to avoid this if possible since they're both representations of the same thing, and there's no need to directly access in the enum value through the code.
Thanks!
Your best bet is probably to create a custom Column type that adapts Enum to translate to and from your own Status class. See here for a full reference. Below is a draft for your core module, the precise code depends a bit on your situation.
# core module
import sqlalchemy.types as types
class DBStatus (types.TypeDecorator):
impl = types.Enum
# what should happen with Status objects on the way into the table
def process_bind_param(self, value, dialect):
if value is None:
return value
return str(value) # if Status has a __str__ or __repr__ method
# what should happen with Enum objects on the way out of the table
def process_result_value(self, value, dialect):
if value is None:
return value
return Status(value)
foo_table = Table(
'foo',
MetaData(),
Column('status', DBStatus('OK', 'Error')),
# ...
)
After this you don't have to do anything special anymore in the module with the mappings:
# module with the mappings
Base = declarative_base()
class Foo (Base):
__table__ = foo_table
# ...
In fact it's so straightforward you might just as well use full declarative mapping, as far as the Status columns are concerned.
# everything in one module
class DBStatus (types.TypeDecorator):
# same as above
Base = declarative_base()
class Foo (Base):
status = Column(DBStatus('OK', 'Error'))
# ...
Using the SQLAlchemy ORM, I want to make sure values are the right type for their columns.
For example, say I have an Integer column. I try to insert the value “hello”, which is not a valid integer. SQLAlchemy will allow me to do this. Only later, when I execute session.commit(), does it raise an exception: sqlalchemy.exc.DataError: (DataError) invalid input syntax integer: "hello"….
I am adding batches of records, and I don’t want to commit after every single add(…), for performance reasons.
So how can I:
Raise the exception as soon as I do session.add(…)
Or, make sure the value I am inserting can be converted to the target Column datatype, before adding it to the batch?
Or any other way to prevent one bad record from spoiling an entire commit().
SQLAlchemy doesn't build this in as it defers to the DBAPI/database as the best and most efficient source of validation and coercion of values.
To build your own validation, usually TypeDecorator or ORM-level validation is used. TypeDecorator has the advantage that it operates at the core and can be pretty transparent, though it only occurs when SQL is actually emitted.
To do validation and coercion sooner, this is at the ORM level.
Validation can be ad-hoc, at the ORM layer, via #validates:
http://docs.sqlalchemy.org/en/latest/orm/mapped_attributes.html#simple-validators
The event system that #validates uses is also available directly. You can write a generalized solution that links validators of your choosing to the types being mapped:
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import event
import datetime
Base= declarative_base()
def validate_int(value):
if isinstance(value, basestring):
value = int(value)
else:
assert isinstance(value, int)
return value
def validate_string(value):
assert isinstance(value, basestring)
return value
def validate_datetime(value):
assert isinstance(value, datetime.datetime)
return value
validators = {
Integer:validate_int,
String:validate_string,
DateTime:validate_datetime,
}
# this event is called whenever an attribute
# on a class is instrumented
#event.listens_for(Base, 'attribute_instrument')
def configure_listener(class_, key, inst):
if not hasattr(inst.property, 'columns'):
return
# this event is called whenever a "set"
# occurs on that instrumented attribute
#event.listens_for(inst, "set", retval=True)
def set_(instance, value, oldvalue, initiator):
validator = validators.get(inst.property.columns[0].type.__class__)
if validator:
return validator(value)
else:
return value
class MyObject(Base):
__tablename__ = 'mytable'
id = Column(Integer, primary_key=True)
svalue = Column(String)
ivalue = Column(Integer)
dvalue = Column(DateTime)
m = MyObject()
m.svalue = "ASdf"
m.ivalue = "45"
m.dvalue = "not a date"
Validation and coercion can also be built at the type level using TypeDecorator, though this is only when SQL is being emitted, such as this example which coerces utf-8 strings to unicode:
http://docs.sqlalchemy.org/en/latest/core/custom_types.html#coercing-encoded-strings-to-unicode
Improving on the answer of #zzzeek , I suggest the following solution:
from sqlalchemy import String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.event import listen_for
Base = declarative_base()
#listens_for(Base, 'attribute_instrument')
def configure_listener(table_cls, attr, col_inst):
if not hasattr(col_inst.property, 'columns'):
return
validator = getattr(col_inst.property.columns[0].type, 'validator', None)
if validator:
# Only decorate columns, that need to be decorated
#listens_for(col_inst, "set", retval=True)
def set_(instance, value, oldvalue, initiator):
return validator(value)
That lets you do things like:
class Name(String):
def validator(self, name):
if isinstance(name, str):
return name.upper()
raise TypeError("name must be a string")
This has two benefits: Firstly, there is only an event triggered, when there actually is a validator attached to the data field object. It does not waste precious CPU cycles on set events for objects, that have no function for validation defined. Secondly, it allows you to define your own field types and just add a validator method there, so not all things that you want to store as Integer etc run through the same checks, just the ones derived from your new field type.
I'm using Django and want to be able to store classes in a database for things like forms and models so that I can easily make them creatable through a user interface since they are just stored in the database as opposed to a regular file. I don't really know a whole lot about this and am not sure if this is a situation where I need to use exec in python or if there is some other way. My searches on this aren't turning up much of anything.
Basically, it would just be where I do a database call and get the contents of a class, then I want to instantiate it. Any advice is appreciated on how to best do this sort of thing.
EDIT: In response to the idea of a malicious __init__ in the class, these are only for things like forms or models where it is tightly controlled through validation what goes in the class, there would never be an __init__ in the class and it would be basically impossible, since I would validate everything server side, to put anything malicious in the class.
Do not store code in the database!!!
Imagine a class with a malicious __init__ method finding it's way in your "class repository" in the database. This means whoever has write access to those database tables has the ability to read any file from your web server and even nuke it's file system, since they have the ability to execute any python code on it.
Don't store the class itself, store the import path as a string in the database (e.g. 'django.forms.CharField')
I started doing this same thing for another project, and saved off the code in my local repository. To address the security concerns I was going to add an argument to the field constructor of allowed base classes. If you do implement this, let me know, I'd love to have it.
helpers.py
def get_class_from_concrete_classpath(class_path):
# Unicode will throw errors in the __import__ (at least in 2.6)
class_path = str(class_path)
mod_list = class_path.split('.')
module_path = '.'.join(mod_list[:-1])
class_name = mod_list[-1]
base_mod = __import__(module_path, fromlist=[class_name,])
return getattr(base_mod, class_name)
def get_concrete_name_of_class(klass):
"""Given a class return the concrete name of the class.
klass - The reference to the class we're interested in.
Raises a `TypeError` if klass is not a class.
"""
if not isinstance(klass, (type, ClassType)):
raise TypeError('The klass argument must be a class. Got type %s; %s' %
(type(klass), klass))
return '%s.%s' % (klass.__module__, klass.__name__)
fields.py
class ClassFormField(forms.Field):
def to_python(self, value):
return get_concrete_name_of_class(value)
class ClassField(models.CharField):
__metaclass__ = models.SubfieldBase
"""Field used for storing a class as a string for later retrieval"""
MAX_LENGTH = 255
default_error_messages = {
'invalid': _(u'Enter a valid class variable.'),
}
def __init__(self, *args, **kwargs):
kwargs['max_length'] = kwargs.get('max_length', ClassField.MAX_LENGTH)
super(ClassField, self).__init__(*args, **kwargs)
def get_prep_value(self, value):
if isinstance(value, (basestring, NoneType)):
return value
return get_concrete_name_of_class(value)
def to_python(self, value):
if isinstance(value, basestring):
return get_class_from_concrete_classpath(value)
return value
def formfield(self, **kwargs):
defaults = {'form_class' : ClassFormField}
defaults.update(kwargs)
return super(ClassField, self).formfield(**defaults)