SQLAlchemy Classical Relationship with No Foreign Key - python

I'm trying to relate two tables that have multiple 'secondary' tables. Instead of the declarative syntax, it's necessary that I use the classical. Here is a simplified schema:
class Apple:
def __init__(self, id=None, name=None):
# ...
class Recipe:
def __init__(self, id=None, appleId=None, name=None):
# ...
class Blog:
def __init__(self, id=None, name=None, recipeId=None, bloggerId=None):
# ...
class Blogger:
def __init__(self, name)
# ...
appleTable = Table('Apple', metadata, Column('id', Integer, primary_key=True), Column('name', String(256)))
recipeTable = Table('Recipe', metadata, Column('id', Integer, primary_key=True), Column('name', String(256)), Column('appleId', Integer, ForeignKey('Apple.id')))
blogTable = Table('Blog', metadata, Column('id', Integer, primary_key=True), Column('name', String(256)), Column('recipeId', Integer, ForeignKey('Recipe.id')), Column('bloggerId', Integer, ForeignKey('Blogger.id')) )
bloggerTable = Table('Blogger', metadata, Column('id', Integer, primary_key=True), Column('name', String(256)))
# call mapper on all tables/classes
# ... #
# Relate 'Apple' to 'Blogger' using 'Recipe' and 'Blog' as intermediates
Apple.appleBloggers = relationship(Blogger, secondary=..., primaryjoin=..., secondaryjoin=...)
What relationship would I need to place into the appleBloggers attribute of Apple in order to retrieve all bloggers who've blogged about apple recipes?
Edit: Solution
An alternative #univerio 's solution is posted below. The difference being with the usage of the mapped objects vs the table variables. I've also added a viewonly parameter, which prevents writes to the attribute.
mapper(Apple, appleTable, properties = {
"appleBloggers": relationship(Blogger,
secondary=join(Recipe, Blog, Recipe.id == Blog.recipeId),
secondaryjoin=lambda: and_(Apple.id == Recipe.appleId, Blogger.id == Blog.bloggerId),
viewonly=True)
})
Original Attempt
Here is what I've tried:
Apple.appleBloggers = relationship(Blogger,
secondary=join(Recipe, Blog, Recipe.id==Blog.recipeId),
primaryjoin= Apple.id == Recipe.appleId,
secondaryjoin= Blog.bloggerId == Blogger.id)
But whenever I do the following:
apple = Apple(name="RedDelicious")
session.add(apple)
session.commit()
print(apple.appleBloggers)
I get the error:
File ".../.pyenv/versions/2.7.10/lib/python2.7/site-packages/sqlalchemy/orm/relationships.py", line 1425, in __str__
return str(self.parent.class_.__name__) + "." + self.key
File ".../.pyenv/versions/2.7.10/lib/python2.7/site-packages/sqlalchemy/util/langhelpers.py", line 840, in __getattr__
return self._fallback_getattr(key)
File ".../.pyenv/versions/2.7.10/lib/python2.7/site-packages/sqlalchemy/util/langhelpers.py", line 818, in _fallback_getattr
raise AttributeError(key)
AttributeError: parent

You're mixing declarative and classical mappings. Assigning a relationship like that only works for declarative. The proper way to do this in classical mapping is:
mapper(Apple, appleTable, properties={
"appleBloggers": relationship(Blogger,
secondary=recipeTable.join(blogTable, recipeTable.c.id == blogTable.c.recipeId),
primaryjoin=appleTable.c.id == recipeTable.c.appleId,
secondaryjoin=blogTable.c.bloggerId == bloggerTable.c.id)
})
Alternative Solution (with mapped objects):
mapper(Apple, appleTable, properties = {
"appleBloggers": relationship(Blogger,
secondary=join(Recipe, Blog, Recipe.id == Blog.recipeId),
secondaryjoin=lambda: and_(Apple.id == Recipe.appleId, Blogger.id == Blog.bloggerId),
viewonly=True)
})

Related

SQLAlchemy CRUD operation is inconsistent with or without outcome assignment

I have an app where I want the user to be able to bookmark/un-bookmark a blog, but upon un-bookmarking, I don't want to remove that bookmark record. So I have an is_bookmarked property on my Bookmark model to determine whether a bookmark is active/inactive.
In my test file, I have
def test_unbookmark_a_blog_do_assign(session):
blog = create_blog(session)
bookmark = toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 1
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 0
This test passes. However, the following won't. (Only difference is I do not assign a variable for the toggle_bookmark's outcome.)
def test_unbookmark_a_blog_no_assign(session):
blog = create_blog(session)
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 1
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 0
It fails at the second assertion assert len(blog.bookmarks) == 0. Reason is that blog._bookmarks[0].is_bookmarked does not get updated outside toggle_bookmark function and is still True, making it available in blog.bookmarks. (Definition attached below)
For context, I am using classic mapping:
#dataclass
class Bookmark:
is_bookmarked: bool = True
blog_id: Optional[int] = None
#dataclass
class Blog:
_bookmarks: List[Bookmark] = field(default_factory=list)
def add_bookmark(self, bookmark):
self._bookmarks.append(bookmark)
#property
def bookmarks(self):
return [bookmark for bookmark in self._bookmarks if bookmark.is_bookmarked]
...
blog_table = Table(
"blog",
metadata,
Column("id", Integer, primary_key=True, index=True))
bookmark_table = Table(
"bookmark",
metadata,
Column("id", Integer, primary_key=True, index=True),
Column("is_bookmarked", Boolean, default=True),
Column("blog_id", ForeignKey("blog.id"), nullable=True),
)
...
mapper(
Blog,
blog_table,
properties={
"_bookmarks": relationship(Bookmark, back_populates="blog"),
},
)
mapper(
Bookmark,
bookmark_table,
properties={
"blog": relationship(Blog, back_populates="_bookmarks"),
},
)
The toggle_bookmark function:
def toggle_bookmark(db_session, *, blog_id):
blog = db_session.query(Blog).get(blog_id)
bookmark = db_session.query(Bookmark).filter(
Bookmark.blog_id == blog_id
).one_or_none()
if bookmark is None:
bookmark = Bookmark()
blog.add_bookmark(bookmark)
db_session.add(blog)
db_session.commit()
return bookmark
bookmark.is_bookmarked = not bookmark.is_bookmarked
db_session.add(bookmark)
db_session.commit()
return bookmark
I am really confused... My gut tells me that it has something to do when the query gets evaluated but I haven't managed to find any evidence to support it. Any help is appreciated. Thanks in advance!
A full example:
from dataclasses import dataclass, field
from typing import Optional, List
from sqlalchemy import (
create_engine, MetaData, Table, Column, Integer, Boolean, ForeignKey)
from sqlalchemy.orm import mapper, relationship, sessionmaker
#dataclass
class Bookmark:
is_bookmarked: bool = True
blog_id: Optional[int] = None
#dataclass
class Blog:
_bookmarks: List[Bookmark] = field(default_factory=list)
def add_bookmark(self, bookmark):
self._bookmarks.append(bookmark)
#property
def bookmarks(self):
return [bookmark for bookmark in self._bookmarks if bookmark.is_bookmarked]
engine = create_engine("sqlite:///")
metadata = MetaData(bind=engine)
blog_table = Table(
"blog",
metadata,
Column("id", Integer, primary_key=True, index=True))
bookmark_table = Table(
"bookmark",
metadata,
Column("id", Integer, primary_key=True, index=True),
Column("is_bookmarked", Boolean, default=True),
Column("blog_id", ForeignKey("blog.id"), nullable=True),
)
metadata.create_all()
mapper(
Blog,
blog_table,
properties={
"_bookmarks": relationship(Bookmark, back_populates="blog"),
},
)
mapper(
Bookmark,
bookmark_table,
properties={
"blog": relationship(Blog, back_populates="_bookmarks"),
},
)
def toggle_bookmark(db_session, *, blog_id):
blog = db_session.query(Blog).get(blog_id)
bookmark = db_session.query(Bookmark).filter(
Bookmark.blog_id == blog_id
).one_or_none()
if bookmark is None:
bookmark = Bookmark()
blog.add_bookmark(bookmark)
db_session.add(blog)
db_session.commit()
return bookmark
bookmark.is_bookmarked = not bookmark.is_bookmarked
db_session.add(bookmark)
db_session.commit()
return bookmark
def create_blog(session):
blog = Blog()
session.add(blog)
session.commit()
return blog
def test_unbookmark_a_blog_do_assign(session):
blog = create_blog(session)
bookmark = toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 1
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 0
def test_unbookmark_a_blog_no_assign(session):
blog = create_blog(session)
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 1
toggle_bookmark(session, blog_id=blog.id)
assert len(blog.bookmarks) == 0
Session = sessionmaker()
test_unbookmark_a_blog_do_assign(Session())
test_unbookmark_a_blog_no_assign(Session())
The core problem is this:
class Bookmark:
is_bookmarked: bool = True # <-- This here
Classical mapping does not install instrumentation over the existing class attribute, and so any changes to an instance's is_bookmarked are not persisted. From there it follows that without the assignment the test reads the state from the database, where it holds its default value True. With the assignment the instance is kept around in the scope of the test and so in the Session, and the later queries return the existing modified instance.
You would run into similar issues with SQLAlchemy, and dataclasses and field(), if using default=:
>>> from dataclasses import dataclass, field
>>> #dataclass
... class C:
... f: bool = field(default=True)
...
>>> C.f
True
A solution to get over the situation is to use a field() with default_factory= for is_bookmarked as well:
#dataclass
class Bookmark:
is_bookmarked: bool = field(default_factory=lambda: True)
...
since in recent enough Python the field() is then not visible in the class as an attribute, and mapping can install instrumentation.

sqlalchemy generic foreign key (like in django ORM)

Does sqlalchemy have something like django's GenericForeignKey? And is it right to use generic foreign fields.
My problem is: I have several models (for example, Post, Project, Vacancy, nothing special there) and I want to add comments to each of them. And I want to use only one Comment model. Does it worth to? Or should I use PostComment, ProjectComment etc.? Pros/cons of both ways?
Thanks!
The simplest pattern which I use most often is that you actually have separate Comment tables for each relationship. This may seem frightening at first, but it doesn't incur any additional code versus using any other approach - the tables are created automatically, and the models are referred to using the pattern Post.Comment, Project.Comment, etc. The definition of Comment is maintained in one place. This approach from a referential point of view is the most simple and efficient, as well as the most DBA friendly as different kinds of Comments are kept in their own tables which can be sized individually.
Another pattern to use is a single Comment table, but distinct association tables. This pattern offers the use case that you might want a Comment linked to more than one kind of object at a time (like a Post and a Project at the same time). This pattern is still reasonably efficient.
Thirdly, there's the polymorphic association table. This pattern uses a fixed number of tables to represent the collections and the related class without sacrificing referential integrity. This pattern tries to come the closest to the Django-style "generic foreign key" while still maintaining referential integrity, though it's not as simple as the previous two approaches.
Imitating the pattern used by ROR/Django, where there are no real foreign keys used and rows are matched using application logic, is also possible.
The first three patterns are illustrated in modern form in the SQLAlchemy distribution under examples/generic_associations/.
The ROR/Django pattern, since it gets asked about so often, I will also add to the SQLAlchemy examples, even though I don't like it much. The approach I'm using is not exactly the same as what Django does as they seem to make use of a "contenttypes" table to keep track of types, that seems kind of superfluous to me, but the general idea of an integer column that points to any number of tables based on a discriminator column is present. Here it is:
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy import create_engine, Integer, Column, \
String, and_
from sqlalchemy.orm import Session, relationship, foreign, remote, backref
from sqlalchemy import event
class Base(object):
"""Base class which provides automated table name
and surrogate primary key column.
"""
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
id = Column(Integer, primary_key=True)
Base = declarative_base(cls=Base)
class Address(Base):
"""The Address class.
This represents all address records in a
single table.
"""
street = Column(String)
city = Column(String)
zip = Column(String)
discriminator = Column(String)
"""Refers to the type of parent."""
parent_id = Column(Integer)
"""Refers to the primary key of the parent.
This could refer to any table.
"""
#property
def parent(self):
"""Provides in-Python access to the "parent" by choosing
the appropriate relationship.
"""
return getattr(self, "parent_%s" % self.discriminator)
def __repr__(self):
return "%s(street=%r, city=%r, zip=%r)" % \
(self.__class__.__name__, self.street,
self.city, self.zip)
class HasAddresses(object):
"""HasAddresses mixin, creates a relationship to
the address_association table for each parent.
"""
#event.listens_for(HasAddresses, "mapper_configured", propagate=True)
def setup_listener(mapper, class_):
name = class_.__name__
discriminator = name.lower()
class_.addresses = relationship(Address,
primaryjoin=and_(
class_.id == foreign(remote(Address.parent_id)),
Address.discriminator == discriminator
),
backref=backref(
"parent_%s" % discriminator,
primaryjoin=remote(class_.id) == foreign(Address.parent_id)
)
)
#event.listens_for(class_.addresses, "append")
def append_address(target, value, initiator):
value.discriminator = discriminator
class Customer(HasAddresses, Base):
name = Column(String)
class Supplier(HasAddresses, Base):
company_name = Column(String)
engine = create_engine('sqlite://', echo=True)
Base.metadata.create_all(engine)
session = Session(engine)
session.add_all([
Customer(
name='customer 1',
addresses=[
Address(
street='123 anywhere street',
city="New York",
zip="10110"),
Address(
street='40 main street',
city="San Francisco",
zip="95732")
]
),
Supplier(
company_name="Ace Hammers",
addresses=[
Address(
street='2569 west elm',
city="Detroit",
zip="56785")
]
),
])
session.commit()
for customer in session.query(Customer):
for address in customer.addresses:
print(address)
print(address.parent)
I know this is probably a terrible way to do this, but it was a quick fix for me.
class GenericRelation(object):
def __init__(self, object_id, object_type):
self.object_id = object_id
self.object_type = object_type
def __composite_values__(self):
return (self.object_id, self.object_type)
class Permission(AbstractBase):
#__abstract__ = True
_object = None
_generic = composite(
GenericRelation,
sql.Column('object_id', data_types.UUID, nullable=False),
sql.Column('object_type', sql.String, nullable=False),
)
permission_type = sql.Column(sql.Integer)
#property
def object(self):
session = object_session(self)
if self._object or not session:
return self._object
else:
object_class = eval(self.object_type)
self._object = session.query(object_class).filter(object_class.id == self.object_id).first()
return self._object
#object.setter
def object(self, value):
self._object = value
self.object_type = value.__class__.__name__
self.object_id = value.id

How can I 'index' SQLAlchemy model attributes that are primary keys and relationships

So say I have some classes X, Y and Z using SQLAlchemy declarative syntax to define some simple columns and relationships
Requirements:
At the class level, (X|Y|Z).primary_keys returns a collection of
the respective class' primary keys' (InstrumentedAttribute
objects) I also want (X|Y|Z).relations to reference the class'
relations in the same way
At the instance level, I would like the same attributes to reference
those attributes' instantiated values, whether they've been
populated using my own constructors, individual attributes
setters, or whatever SQLAlchemy does when it retrieves rows from
the db.
So far I have the following.
import collections
import sqlalchemy
import sqlalchemy.ext.declarative
from sqlalchemy import MetaData, Column, Table, ForeignKey, Integer, String, Date, Text
from sqlalchemy.orm import relationship, backref
class IndexedMeta(sqlalchemy.ext.declarative.DeclarativeMeta):
"""Metaclass to initialize some class-level collections on models"""
def __new__(cls, name, bases, defaultdict):
cls.pk_columns = set()
cls.relations = collections.namedtuple('RelationshipItem', 'one many')( set(), set())
return super().__new__(cls, name, bases, defaultdict)
Base = sqlalchemy.ext.declarative.declarative_base(metaclass=IndexedMeta)
def build_class_lens(cls, key, inst):
"""Populates the 'indexes' of primary key and relationship attributes with the attributes' names. Additionally, separates "x to many" relationships from "x to one" relationships and associates "x to one" relathionships with the local-side foreign key column"""
if isinstance(inst.property, sqlalchemy.orm.properties.ColumnProperty):
if inst.property.columns[0].primary_key:
cls.pk_columns.add(inst.key)
elif isinstance(inst.property, sqlalchemy.orm.properties.RelationshipProperty):
if inst.property.direction.name == ('MANYTOONE' or 'ONETOONE'):
local_column = cls.__mapper__.get_property_by_column(inst.property.local_side[0]).key
cls.relations.one.add( (local_column, inst.key) )
else:
cls.relations.many.add(inst.key)
sqlalchemy.event.listen(Base, 'attribute_instrument', build_class_lens)
class Meeting(Base):
__tablename__ = 'meetings'
def __init__(self, memo):
self.memo = memo
id = Column(Integer, primary_key=True)
date = Column(Date)
memo = Column('note', String(60), nullable=True)
category_name = Column('category', String(60), ForeignKey('categories.name'))
category = relationship("Category", backref=backref('meetings'))
topics = relationship("Topic",
secondary=meetings_topics,
backref="meetings")
...
...
Ok, so that gets me by on the class level, though I feel like I am doing silly things with metaclasses, and I get some strange intermittent errors where the 'sqlalchemy' module allegedly isn't recognized in build_class_lens and evals to Nonetype.
I am not quite sure how I should proceed at the instance level.
I've looked into the events interface. I see the ORM event init, but it seems to run prior to the __init__ function defined on my models, meaning the instance attributes haven't yet been populated at that time, so I can't build my 'lens' on them.
I also wonder if the Attribute event set might be of help. That is my next try, though i still wonder if it is the most appropriate way.
All in all I really wonder if I am missing some really elegant way to approach this problem.
I think the metaclass thing with declarative goes by the old XML saying, "if you have a problem, and use XML, now you have two problems". The metaclass in Python is useful pretty much as a hook to detect the construction of new classes, and that's about it. We now have enough events that there shouldn't be any need to use a metaclass beyond what declarative already does.
In this case I'd go a little further and say that the approach of trying to actively build up these collections is not really worth it - it's much easier to generate them lazily, as below:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
import collections
from sqlalchemy.orm.properties import RelationshipProperty
class memoized_classproperty(object):
"""A decorator that evaluates once at the class level,
assigns the new value to the class.
"""
def __init__(self, fget, doc=None):
self.fget = fget
self.__doc__ = doc or fget.__doc__
self.__name__ = fget.__name__
def __get__(desc, self, cls):
result = desc.fget(cls)
setattr(cls, desc.__name__, result)
return result
class Lens(object):
#memoized_classproperty
def pk_columns(cls):
return class_mapper(cls).primary_key
#memoized_classproperty
def relations(cls):
props = collections.namedtuple('RelationshipItem', 'one many')(set(), set())
# 0.8 will have "inspect(cls).relationships" here
mapper = class_mapper(cls)
for item in mapper.iterate_properties:
if isinstance(item, RelationshipProperty):
if item.direction.name == ('MANYTOONE' or 'ONETOONE'):
local_column = mapper.get_property_by_column(item.local_side[0]).key
props.one.add((local_column, item.key))
else:
props.many.add(item.key)
return props
Base= declarative_base(cls=Lens)
meetings_topics = Table("meetings_topics", Base.metadata,
Column('topic_id', Integer, ForeignKey('topic.id')),
Column('meetings_id', Integer, ForeignKey('meetings.id')),
)
class Meeting(Base):
__tablename__ = 'meetings'
def __init__(self, memo):
self.memo = memo
id = Column(Integer, primary_key=True)
date = Column(Date)
memo = Column('note', String(60), nullable=True)
category_name = Column('category', String(60), ForeignKey('categories.name'))
category = relationship("Category", backref=backref('meetings'))
topics = relationship("Topic",
secondary=meetings_topics,
backref="meetings")
class Category(Base):
__tablename__ = 'categories'
name = Column(String(50), primary_key=True)
class Topic(Base):
__tablename__ = 'topic'
id = Column(Integer, primary_key=True)
print Meeting.pk_columns
print Meeting.relations.one
# assignment is OK, since prop is memoized
Meeting.relations.one.add("FOO")
print Meeting.relations.one

SQLAlchemy - How to make "django choices" using SQLAlchemy?

In Django we can use very simple "choices" e.g.:
GENDER_CHOICES = (
('M', 'Male'),
('F', 'Female'),
)
class Foo(models.Model):
gender = models.CharField(max_length=1, choices=GENDER_CHOICES)
How to make something like this using SQLAlchemy?
I would probably go for sqlalchemy_utils
Use custom types.
Example:
import sqlalchemy.types as types
class ChoiceType(types.TypeDecorator):
impl = types.String
def __init__(self, choices, **kw):
self.choices = dict(choices)
super(ChoiceType, self).__init__(**kw)
def process_bind_param(self, value, dialect):
return [k for k, v in self.choices.iteritems() if v == value][0]
def process_result_value(self, value, dialect):
return self.choices[value]
The use of it would look like:
class Entity(Base):
__tablename__ = "entity"
height = Column(
ChoiceType({"short": "short", "medium": "medium", "tall": "tall"}), nullable=False
)
If you are using Python 3, you have to change iteritems() to items().
SQLAlchemy has an Enum type, which functions like Django's "choices" field.
From the docs:
'The Enum type will make use of the backend’s native “ENUM” type if one is available; otherwise, it uses a VARCHAR datatype and produces a CHECK constraint.'
import enum
class MyEnum(enum.Enum):
one = 1
two = 2
three = 3
t = Table(
'data', MetaData(),
Column('value', Enum(MyEnum))
)
connection.execute(t.insert(), {"value": MyEnum.two})
assert connection.scalar(t.select()) is MyEnum.two

Reverse mapping from a table to a model in SQLAlchemy

To provide an activity log in my SQLAlchemy-based app, I have a model like this:
class ActivityLog(Base):
__tablename__ = 'activitylog'
id = Column(Integer, primary_key=True)
activity_by_id = Column(Integer, ForeignKey('users.id'), nullable=False)
activity_by = relation(User, primaryjoin=activity_by_id == User.id)
activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
activity_type = Column(SmallInteger, nullable=False)
target_table = Column(Unicode(20), nullable=False)
target_id = Column(Integer, nullable=False)
target_title = Column(Unicode(255), nullable=False)
The log contains entries for multiple tables, so I can't use ForeignKey relations. Log entries are made like this:
doc = Document(name=u'mydoc', title=u'My Test Document',
created_by=user, edited_by=user)
session.add(doc)
session.flush() # See note below
log = ActivityLog(activity_by=user, activity_type=ACTIVITY_ADD,
target_table=Document.__table__.name, target_id=doc.id,
target_title=doc.title)
session.add(log)
This leaves me with three problems:
I have to flush the session before my doc object gets an id. If I had used a ForeignKey column and a relation mapper, I could have simply called ActivityLog(target=doc) and let SQLAlchemy do the work. Is there any way to work around needing to flush by hand?
The target_table parameter is too verbose. I suppose I could solve this with a target property setter in ActivityLog that automatically retrieves the table name and id from a given instance.
Biggest of all, I'm not sure how to retrieve a model instance from the database. Given an ActivityLog instance log, calling self.session.query(log.target_table).get(log.target_id) does not work, as query() expects a model as parameter.
One workaround appears to be to use polymorphism and derive all my models from a base model which ActivityLog recognises. Something like this:
class Entity(Base):
__tablename__ = 'entities'
id = Column(Integer, primary_key=True)
title = Column(Unicode(255), nullable=False)
edited_at = Column(DateTime, onupdate=datetime.utcnow, nullable=False)
entity_type = Column(Unicode(20), nullable=False)
__mapper_args__ = {'polymorphic_on': entity_type}
class Document(Entity):
__tablename__ = 'documents'
__mapper_args__ = {'polymorphic_identity': 'document'}
body = Column(UnicodeText, nullable=False)
class ActivityLog(Base):
__tablename__ = 'activitylog'
id = Column(Integer, primary_key=True)
...
target_id = Column(Integer, ForeignKey('entities.id'), nullable=False)
target = relation(Entity)
If I do this, ActivityLog(...).target will give me a Document instance when it refers to a Document, but I'm not sure it's worth the overhead of having two tables for everything. Should I go ahead and do it this way?
One way to solve this is polymorphic associations. It should solve all 3 of your issues and also make database foreign key constraints work. See the polymorphic association example in SQLAlchemy source. Mike Bayer has an old blogpost that discusses this in greater detail.
Definitely go through the blogpost and examples Ants linked to. I did not find the explanation confusion, but rather assuming some more experience on the topic.
Few things I can suggest are:
ForeignKeys: in general I agree they are a good thing go have, but I am not sure it is conceptually important in your case: you seem to be using this ActivityLog as an orthogonal cross-cutting concern (AOP); but version with foreign keys would effectively make your business objects aware of the ActivityLog. Another problem with having FK for audit purposes using schema setup you have is that if you allow object deletion, FK requirement will delete all the ActivityLog entries for this object.
Automatic logging: you are doing all this logging manually whenever you create/modify(/delete) the object. With SA you could implement a SessionExtension with before_commit which would do the job for you automatically.
In this way you completely can avoid writing parts like below:
log = ActivityLog(activity_by=user, activity_type=ACTIVITY_ADD,
target_table=Document.__table__.name, target_id=doc.id,
target_title=doc.title)
session.add(log)
EDIT-1: complete sample code added
The code is based on the first non-FK version from http://techspot.zzzeek.org/?p=13.
The choice not to use FK is based on the fact that for audit purposes when the
main object is deleted, it should not cascade to delete all the audit log entries.
Also this keeps auditable objects unaware of the fact they are being audited.
Implementation uses a SA one-to-many relationship. It is possible that some
objects are modified many times, which will result in many audit log entries.
By default SA will load the relationship objects when adding a new entry to the
list. Assuming that during "normal" usage we would like only to add new audit
log entry, we use lazy='noload' flag so that the relation from the main object
will never be loaded. It is loaded when navigated from the other side though,
and also can be loaded from the main object using custom query, which is shown
in the example as well using activitylog_readonly readonly property.
Code (runnable with some tests):
from datetime import datetime
from sqlalchemy import create_engine, Column, Integer, SmallInteger, String, DateTime, ForeignKey, Table, UnicodeText, Unicode, and_
from sqlalchemy.orm import relationship, dynamic_loader, scoped_session, sessionmaker, class_mapper, backref
from sqlalchemy.orm.session import Session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.interfaces import SessionExtension
import logging
logging.basicConfig(level=logging.INFO)
_logger = logging.getLogger()
ACTIVITY_ADD = 1
ACTIVITY_MOD = 2
ACTIVITY_DEL = 3
class ActivityLogSessionExtension(SessionExtension):
_logger = logging.getLogger('ActivityLogSessionExtension')
def before_commit(self, session):
self._logger.debug("before_commit: %s", session)
for d in session.new:
self._logger.info("before_commit >> add: %s", d)
if hasattr(d, 'create_activitylog'):
log = d.create_activitylog(ACTIVITY_ADD)
for d in session.dirty:
self._logger.info("before_commit >> mod: %s", d)
if hasattr(d, 'create_activitylog'):
log = d.create_activitylog(ACTIVITY_MOD)
for d in session.deleted:
self._logger.info("before_commit >> del: %s", d)
if hasattr(d, 'create_activitylog'):
log = d.create_activitylog(ACTIVITY_DEL)
# Configure test data SA
engine = create_engine('sqlite:///:memory:', echo=False)
session = scoped_session(sessionmaker(bind=engine, autoflush=False, extension=ActivityLogSessionExtension()))
Base = declarative_base()
Base.query = session.query_property()
class _BaseMixin(object):
""" Just a helper mixin class to set properties on object creation.
Also provides a convenient default __repr__() function, but be aware that
also relationships are printed, which might result in loading relations.
"""
def __init__(self, **kwargs):
for k,v in kwargs.items():
setattr(self, k, v)
def __repr__(self):
return "<%s(%s)>" % (self.__class__.__name__,
', '.join('%s=%r' % (k, self.__dict__[k])
for k in sorted(self.__dict__) if '_sa_' != k[:4] and '_backref_' != k[:9])
)
class User(Base, _BaseMixin):
__tablename__ = u'users'
id = Column(Integer, primary_key=True)
name = Column(String)
class Document(Base, _BaseMixin):
__tablename__ = u'documents'
id = Column(Integer, primary_key=True)
title = Column(Unicode(255), nullable=False)
body = Column(UnicodeText, nullable=False)
class Folder(Base, _BaseMixin):
__tablename__ = u'folders'
id = Column(Integer, primary_key=True)
title = Column(Unicode(255), nullable=False)
comment = Column(UnicodeText, nullable=False)
class ActivityLog(Base, _BaseMixin):
__tablename__ = u'activitylog'
id = Column(Integer, primary_key=True)
activity_by_id = Column(Integer, ForeignKey('users.id'), nullable=False)
activity_by = relationship(User) # #note: no need to specify the primaryjoin
activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
activity_type = Column(SmallInteger, nullable=False)
target_table = Column(Unicode(20), nullable=False)
target_id = Column(Integer, nullable=False)
target_title = Column(Unicode(255), nullable=False)
# backref relation for auditable
target = property(lambda self: getattr(self, '_backref_%s' % self.target_table))
def _get_user():
""" This method returns the User object for the current user.
#todo: proper implementation required
#hack: currently returns the 'user2'
"""
return session.query(User).filter_by(name='user2').one()
# auditable support function
# based on first non-FK version from http://techspot.zzzeek.org/?p=13
def auditable(cls, name):
def create_activitylog(self, activity_type):
log = ActivityLog(activity_by=_get_user(),
activity_type=activity_type,
target_table=table.name,
target_title=self.title,
)
getattr(self, name).append(log)
return log
mapper = class_mapper(cls)
table = mapper.local_table
cls.create_activitylog = create_activitylog
def _get_activitylog(self):
return Session.object_session(self).query(ActivityLog).with_parent(self).all()
setattr(cls, '%s_readonly' %(name,), property(_get_activitylog))
# no constraints, therefore define constraints in an ad-hoc fashion.
primaryjoin = and_(
list(table.primary_key)[0] == ActivityLog.__table__.c.target_id,
ActivityLog.__table__.c.target_table == table.name
)
foreign_keys = [ActivityLog.__table__.c.target_id]
mapper.add_property(name,
# #note: because we use the relationship, by default all previous
# ActivityLog items will be loaded for an object when new one is
# added. To avoid this, use either dynamic_loader (http://www.sqlalchemy.org/docs/reference/orm/mapping.html#sqlalchemy.orm.dynamic_loader)
# or lazy='noload'. This is the trade-off decision to be made.
# Additional benefit of using lazy='noload' is that one can also
# record DEL operations in the same way as ADD, MOD
relationship(
ActivityLog,
lazy='noload', # important for relationship
primaryjoin=primaryjoin,
foreign_keys=foreign_keys,
backref=backref('_backref_%s' % table.name,
primaryjoin=list(table.primary_key)[0] == ActivityLog.__table__.c.target_id,
foreign_keys=foreign_keys)
)
)
# this will define which classes support the ActivityLog interface
auditable(Document, 'activitylogs')
auditable(Folder, 'activitylogs')
# create db schema
Base.metadata.create_all(engine)
## >>>>> TESTS >>>>>>
# create some basic data first
u1 = User(name='user1')
u2 = User(name='user2')
session.add(u1)
session.add(u2)
session.commit()
session.expunge_all()
# --check--
assert not(_get_user() is None)
##############################
## ADD
##############################
_logger.info('-' * 80)
d1 = Document(title=u'Document-1', body=u'Doc1 some body skipped the body')
# when not using SessionExtension for any reason, this can be called manually
#d1.create_activitylog(ACTIVITY_ADD)
session.add(d1)
session.commit()
f1 = Folder(title=u'Folder-1', comment=u'This folder is empty')
# when not using SessionExtension for any reason, this can be called manually
#f1.create_activitylog(ACTIVITY_ADD)
session.add(f1)
session.commit()
# --check--
session.expunge_all()
logs = session.query(ActivityLog).all()
_logger.debug(logs)
assert len(logs) == 2
assert logs[0].activity_type == ACTIVITY_ADD
assert logs[0].target.title == u'Document-1'
assert logs[0].target.title == logs[0].target_title
assert logs[1].activity_type == ACTIVITY_ADD
assert logs[1].target.title == u'Folder-1'
assert logs[1].target.title == logs[1].target_title
##############################
## MOD(ify)
##############################
_logger.info('-' * 80)
session.expunge_all()
d1 = session.query(Document).filter_by(id=1).one()
assert d1.title == u'Document-1'
assert d1.body == u'Doc1 some body skipped the body'
assert d1.activitylogs == []
d1.title = u'Modified: Document-1'
d1.body = u'Modified: body'
# when not using SessionExtension (or it does not work, this can be called manually)
#d1.create_activitylog(ACTIVITY_MOD)
session.commit()
_logger.debug(d1.activitylogs_readonly)
# --check--
session.expunge_all()
logs = session.query(ActivityLog).all()
assert len(logs)==3
assert logs[2].activity_type == ACTIVITY_MOD
assert logs[2].target.title == u'Modified: Document-1'
assert logs[2].target.title == logs[2].target_title
##############################
## DEL(ete)
##############################
_logger.info('-' * 80)
session.expunge_all()
d1 = session.query(Document).filter_by(id=1).one()
# when not using SessionExtension for any reason, this can be called manually,
#d1.create_activitylog(ACTIVITY_DEL)
session.delete(d1)
session.commit()
session.expunge_all()
# --check--
session.expunge_all()
logs = session.query(ActivityLog).all()
assert len(logs)==4
assert logs[0].target is None
assert logs[2].target is None
assert logs[3].activity_type == ACTIVITY_DEL
assert logs[3].target is None
##############################
## print all activity logs
##############################
_logger.info('=' * 80)
logs = session.query(ActivityLog).all()
for log in logs:
_ = log.target
_logger.info("%s -> %s", log, log.target)
##############################
## navigate from main object
##############################
_logger.info('=' * 80)
session.expunge_all()
f1 = session.query(Folder).filter_by(id=1).one()
_logger.info(f1.activitylogs_readonly)

Categories

Resources