I have the following table structure (I have simplified it as much as possible, narrowed down the child/inheriting tables [there are additional] and removed all irrelevant columns from the provided tables):
## Base is my declarative_base
class AbstractQuestion(Base):
questionTypeId: Column = Column(
Integer, ForeignKey("luQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {
"polymorphic_identity": 0,
"polymorphic_on": questionTypeId,
}
class MultiChoiceQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
__mapper_args__ = {"polymorphic_identity": 1}
class AbstractSurveyQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
surveyQuestionTypeId: Column = Column(
Integer, ForeignKey("luSurveyQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {"polymorphic_identity": 2}
class RatingQuestion(AbstractSurveyQuestion):
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
The challenge I'm facing is, that I'm trying to make AbstractSurveyQuestion have two types of polymorphic mappings - one as a child of AbstractQuestion with a polymorphic_identity that matches the questionTypeId, but I also need it to have a separate polymorphic_on mapper for its own child table, which is RatingQuestion.
The closest thing I could find was this question, but it doesn't seem to be aiming at exactly what I'm looking for.
I also looked at the official docs about inheritance, but again couldn't find an accurate example to what I'm trying to achieve.
Can anyone please help me with this?
Thanks!
I posted the same question on SQLAlchemy's GitHub repo. Got this answer from the maintainer:
https://github.com/sqlalchemy/sqlalchemy/discussions/8089#discussioncomment-2878725
I'll paste the contents below as well:
it sounds like you are looking for mult-level polymorphic_on. We don't support that right now without workarounds, and that's #2555 which is a feature we're unlikely to implement, or if we did it would be a long time from now.
It looks like you are using joined inheritance....so...two ways. The more SQL efficient one is to have an extra "supplemetary" column on your base table that can discriminate for AbstractSurveyQuestion...because if you query for all the AbstractQuestion objects, by default it's just going to query that one table, and needs to know from each row if that row is in fact a RatingQuestion.
the more convoluted way is to use mapper-configured with_polymorphic so that all queries for AbstractQuestion include all the tables (or a subset of tables, can be configured, but at minimum you'd need to join out to AbstractSurveyQuestion) using a LEFT OUTER JOIN (or if you really wanted to go crazy it can be a UNION ALL).
the workarounds are a little ugly since it's not very easy to get a "composed" value out of two columns in SQL, but they are contained to the base classes. Below examples work on SQLite and might need tweaking for other databases.
Here's the discriminator on base table demo, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, aq.d2 AS aq_d2, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(aq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on
FROM aq
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import inspect
from sqlalchemy import Integer, func
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
d2 = Column(
Integer, nullable=True
) # this is a "supplementary" discrim column
__mapper_args__ = {
"polymorphic_identity": ident_(0),
"polymorphic_on": cast(d1, String)
+ ", "
+ cast(func.coalesce(d2, ""), String),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2)}
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)
then there's the one that maintains your schema fully, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(acq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on, acq.id AS acq_id, acq.d2 AS acq_d2
FROM aq LEFT OUTER JOIN acq ON aq.id = acq.id
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import func
from sqlalchemy import inspect
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
__mapper_args__ = {
"polymorphic_identity": ident_(0),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
d2 = Column(Integer, nullable=False)
__mapper_args__ = {
"polymorphic_identity": ident_(2),
"polymorphic_load": "inline", # adds ASQ to all AQ queries
}
# after ASQ is set up, set the discriminator on the base class
# that includes ASQ column
inspect(AbstractQuestion)._set_polymorphic_on(
cast(AbstractQuestion.d1, String)
+ ", "
+ cast(func.coalesce(AbstractSurveyQuestion.d2, ""), String)
)
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)
Related
I am interacting with a database with a star schema. I have a fact table that depends on a dimension table in two different ways.
When writing SQL by hand, I join the dimension table twice under different names. I want to do the same thing in SQLAlchemy.
After reading the docs and this thread, what I have is:
from sqlalchemy import create_engine, Column, Integer, String, TIMESTAMP, Float, ForeignKey, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relation, sessionmaker, synonym, aliased
Base = declarative_base()
class RBase(Base):
__tablename__ = "regions"
# id
id = Column("id", Integer, primary_key=True, nullable=True)
# Dimensions
region = Column("region", String, nullable=True)
_all_dimensions = ()
class Regions(RBase):
# Dimensions
__mapper_args__ = {
'polymorphic_identity': 'Regions'
}
class OtherRegions(RBase):
# Dimensions
otherregion = synonym("region")
__mapper_args__ = {
'polymorphic_identity': 'OtherRegions'
}
class FactTable(Base):
__tablename__ = "somefact"
# Dimension ids
sourceregionid = Column(
"sourceregionid", Integer, ForeignKey("regions.id"), primary_key=True, nullable=True
)
targetregionid = Column(
"targetregionid", Integer, ForeignKey("regions.id"), primary_key=True, nullable=True
)
# Facts
examplefact = Column("examplefact", Float, nullable=True)
# Relations
region = relation(Regions, innerjoin=True, foreign_keys=[sourceregionid])
otherregion = relation(OtherRegions, innerjoin=True, foreign_keys=[targetregionid])
The statement I'm getting from this is
SELECT regions.region, somefact.examplefact
FROM somefact
JOIN regions ON regions.id = somefact.sourceregionid
JOIN regions ON regions.id = somefact.targetregionid
WHERE regions.region = %(region_1)s
which causes and error. What I want is
SELECT regions.region, otherregions.region as otherregion, somefact.examplefact
FROM somefact
JOIN regions ON regions.id = somefact.sourceregionid
JOIN regions as otherregions ON otherregions.id = somefact.targetregionid
WHERE regions.region = %(region_1)s
I have spent a while going in circles about this, so any help would be much appreciated.
I need to create a PostgreSQL Full Text Search index in Python with SQLAlchemy. Here's what I want in SQL:
CREATE TABLE person ( id INTEGER PRIMARY KEY, name TEXT );
CREATE INDEX person_idx ON person USING GIN (to_tsvector('simple', name));
Now how do I do the second part with SQLAlchemy when using the ORM:
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
You could create index using Index in __table_args__. Also I use a function to create ts_vector to make it more tidy and reusable if more than one field is required. Something like below:
from sqlalchemy.dialects import postgresql
def create_tsvector(*args):
exp = args[0]
for e in args[1:]:
exp += ' ' + e
return func.to_tsvector('english', exp)
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
__ts_vector__ = create_tsvector(
cast(func.coalesce(name, ''), postgresql.TEXT)
)
__table_args__ = (
Index(
'idx_person_fts',
__ts_vector__,
postgresql_using='gin'
)
)
Update:
A sample query using index (corrected based on comments):
people = Person.query.filter(Person.__ts_vector__.match(expressions, postgresql_regconfig='english')).all()
The answer from #sharez is really useful (especially if you need to concatenate columns in your index). For anyone looking to create a tsvector GIN index on a single column, you can simplify the original answer approach with something like:
from sqlalchemy import Column, Index, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
textsearch = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
func.to_tsvector('english', textsearch),
postgresql_using='gin'
),
)
Note that the comma following Index(...) in __table_args__ is not a style choice, the value of __table_args__ must be a tuple, dictionary, or None.
If you do need to create a tsvector GIN index on multiple columns, here is another way to get there using text().
from sqlalchemy import Column, Index, Integer, String, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
def to_tsvector_ix(*columns):
s = " || ' ' || ".join(columns)
return func.to_tsvector('english', text(s))
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
atext = Column(String)
btext = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
to_tsvector_ix('atext', 'btext'),
postgresql_using='gin'
),
)
Thanks for this question and answers.
I'd like to add a bit more in case ppl using alembic to manage versions by
using autogenerate
which creating the index seems not be detected.
We might end up writing our own alter script which look like.
"""add fts idx
Revision ID: e3ce1ce23d7a
Revises: 079c4455d54d
Create Date:
"""
# revision identifiers, used by Alembic.
revision = 'e3ce1ce23d7a'
down_revision = '079c4455d54d'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_index('idx_content_fts', 'table_name',
[sa.text("to_tsvector('english', content)")],
postgresql_using='gin')
def downgrade():
op.drop_index('idx_content_fts')
It has been answered already by #sharez and #benvc. I needed to make it work with weights though. This is how I did it based on their answers :
from sqlalchemy import Column, func, Index, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.operators import op
CONFIG = 'english'
Base = declarative_base()
def create_tsvector(*args):
field, weight = args[0]
exp = func.setweight(func.to_tsvector(CONFIG, field), weight)
for field, weight in args[1:]:
exp = op(exp, '||', func.setweight(func.to_tsvector(CONFIG, field), weight))
return exp
class Example(Base):
__tablename__ = 'example'
foo = Column(String)
bar = Column(String)
__ts_vector__ = create_tsvector(
(foo, 'A'),
(bar, 'B')
)
__table_args__ = (
Index('my_index', __ts_vector__, postgresql_using='gin'),
)
Previous answers here were helpful for pointing in the right direction.
Below, a distilled & simplified approach using ORM approach & TSVectorType helper from sqlalchemy-utils (that is quite basic and can be simply copy/pasted to avoid external dependencies if needed https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html):
Defining a TSVECTOR column (TSVectorType) in your ORM model (declarative) populated automatically from the source text field(s)
import sqlalchemy as sa
from sqlalchemy_utils.types.ts_vector import TSVectorType
# ^-- https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html
class MyModel(Base):
__tablename__ = 'mymodel'
id = sa.Column(sa.Integer, primary_key=True)
content = sa.Column(sa.String, nullable=False)
content_tsv = sa.Column(
TSVectorType("content", regconfig="english"),
sa.Computed("to_tsvector('english', \"content\")", persisted=True))
# ^-- equivalent for SQL:
# COLUMN content_tsv TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', "content")) STORED;
__table_args__ = (
# Indexing the TSVector column
sa.Index("idx_mymodel_content_tsv", content_tsv, postgresql_using="gin"),
)
For additional details on querying using ORM, see https://stackoverflow.com/a/73999486/11750716 (there is an important difference between SQLAlchemy 1.4 and SQLAlchemy 2.0).
I tried to use sqlaclhemy joined table inheritance and had a strange occurrence.
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
...
class GoodsPlacement(Container, Loadable, Dumpable):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
...
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
content = relationship("GoodsPlacement",
primaryjoin="Departure.content_id==GoodsPlacement.id",
foreign_keys=[content_id],
lazy='joined',
backref="departures")
...
When I write query:
session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
it generates me something like this:
SELECT
objects.objid AS objects_objid,
goods_placements.objid AS goods_placements_objid,
objects.objname AS objects_objname
FROM objects
JOIN goods_placements ON objects.objid = goods_placements.objid
WHERE NOT (EXISTS (
SELECT 1
FROM (
SELECT
objects.objid AS objects_objid,
objects.objname AS objects_objname,
departures.id AS departures_id,
departures.content_id AS departures_content_id,
departures.status_id AS departures_status_id
FROM objects
JOIN departures ON objects.objid = departures.id)
AS anon_1, objects
WHERE anon_1.departures_content_id = objects.objid
AND anon_1.departures_status_id < :status_id_1)
)
And this doesn't work because objects in exist clause overrides outer objects.
As workaround I used exists from sqlexpression directly,
session.query(GoodsPlacement).filter(~exists([1],
and_("departures.status_id<2",
"departures.content_id=goods_placements.objid"),
from_obj="departures"))
but it strongly depends from column and table names.
How I can specify alias for object table in exists statement?
Debian wheezy, python-2.7.3rc2, sqlaclhemy 0.7.7-1
there's a bug involving the declarative system in how it sets up columns. The "objid" name you're giving the columns, distinct from the "id" attribute name, is the source of the issue here. The below test case approximates your above system and shows a workaround until the bug is fixed:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base= declarative_base()
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
class Container(CommonObject):
__tablename__ = 'containers'
id = Column("objid", Integer, ForeignKey("objects.objid"), primary_key=True)
class GoodsPlacement(Container):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
status_id = Column(Integer)
content = relationship("GoodsPlacement",
primaryjoin=lambda:Departure.__table__.c.content_id==GoodsPlacement.__table__.c.objid,
backref="departures"
)
session = Session()
print session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
output:
SELECT objects.objid AS objects_objid, containers.objid AS containers_objid, goods_placements.objid AS goods_placements_objid, objects.objname AS objects_objname
FROM objects JOIN containers ON objects.objid = containers.objid JOIN goods_placements ON containers.objid = goods_placements.objid
WHERE NOT (EXISTS (SELECT 1
FROM (SELECT objects.objid AS objects_objid, objects.objname AS objects_objname, departures.id AS departures_id, departures.content_id AS departures_content_id, departures.status_id AS departures_status_id
FROM objects JOIN departures ON objects.objid = departures.id) AS anon_1
WHERE anon_1.departures_content_id = goods_placements.objid AND anon_1.departures_status_id < :status_id_1))
Suppose we have a PostgreSQL database with two tables A, B.
table A columns: id, name
table B columns: id, name, array_a
The column array_a in table B contains a variable length array of ids from table A. In SQLAlchemy we have two classes that model those tables, say class A and B.
The following works fine to get all the objects A that are referenced in an object B:
session.query(A).join(B, A.id == func.any(B.array_a)).filter(B.id == <id>).all()
How can we create a relationship in B referencing the objects A corresponding to the array? Tried column comparators using the func.any above but it complains that ANY(array_a) is not a column in the model. Specifying the primaryjoin conditions as above doesn't seem to cut it either.
This anti-pattern is called "Jaywalking"; and PostgreSQL's powerful type system makes it very tempting. you should be using another table:
CREATE TABLE table_a (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE table_b (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE a_b (
a_id INTEGER PRIMARY KEY REFERENCES table_a(id),
b_id INTEGER PRIMARY KEY REFERENCES table_b(id)
)
Which is mapped:
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
a_b_table = Table("a_b", Base.metadata,
Column("a_id", Integer, ForeignKey("table_a.id"), primary_key=True),
Column("b_id", Integer, ForeignKey("table_b.id"), primary_key=True))
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
a_set = relationship(A, secondary=a_b_table, backref="b_set")
example:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM a_b, table_b
WHERE table_a.id = a_b.a_id AND table_b.id = a_b.b_id AND table_b.name = :name_1)
If you are stuck with the ARRAY column, your best bet is to use an alternate selectable that "looks" like a proper association table.
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_b_selectable = select([func.unnest(B.array_a).label("a_id"),
B.id.label("b_id")]).alias()
A.b_set = relationship(B, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True,)
B.a_set = relationship(A, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True)
which gives you:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM (SELECT unnest(table_b.array_a) AS a_id, table_b.id AS b_id
FROM table_b) AS anon_1, table_b
WHERE table_a.id = anon_1.a_id AND anon_1.b_id = table_b.id AND table_b.name = :name_1)
And obviously, since there's no real table there, viewonly=True is neccesary and you can't get the nice, dynamic objecty goodness you would if you had avoided jaywalking.
Or else simply you can join explicitly like below:
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_ids= relationship('A',primaryjoin='A.id == any_(foreign(B.array_a))',uselist=True)
I'm new to SQLAlchemy and relational databases, and I'm trying to set up a model for an annotated lexicon. I want to support an arbitrary number of key-value annotations for the words which can be added or removed at runtime. Since there will be a lot of repetition in the names of the keys, I don't want to use this solution directly, although the code is similar.
My design has word objects and property objects. The words and properties are stored in separate tables with a property_values table that links the two. Here's the code:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///test.db', echo=True)
meta = MetaData(bind=engine)
property_values = Table('property_values', meta,
Column('word_id', Integer, ForeignKey('words.id')),
Column('property_id', Integer, ForeignKey('properties.id')),
Column('value', String(20))
)
words = Table('words', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('freq', Integer)
)
properties = Table('properties', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20), nullable=False, unique=True)
)
meta.create_all()
class Word(object):
def __init__(self, name, freq=1):
self.name = name
self.freq = freq
class Property(object):
def __init__(self, name):
self.name = name
mapper(Property, properties)
Now I'd like to be able to do the following:
Session = sessionmaker(bind=engine)
s = Session()
word = Word('foo', 42)
word['bar'] = 'yes' # or word.bar = 'yes' ?
s.add(word)
s.commit()
Ideally this should add 1|foo|42 to the words table, add 1|bar to the properties table, and add 1|1|yes to the property_values table. However, I don't have the right mappings and relations in place to make this happen. I get the sense from reading the documentation at http://www.sqlalchemy.org/docs/05/mappers.html#association-pattern that I want to use an association proxy or something of that sort here, but the syntax is unclear to me. I experimented with this:
mapper(Word, words, properties={
'properties': relation(Property, secondary=property_values)
})
but this mapper only fills in the foreign key values, and I need to fill in the other value as well. Any assistance would be greatly appreciated.
Simply use Dictionary-Based Collections mapping mapping - out of the box solution to your question. Extract from the link:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
# ...
item = Item()
item.notes['color'] = Note('color', 'blue')
print item.notes['color']
Or try the solution for Inserting data in Many to Many relationship in SQLAlchemy. Obviously you have to replace the list logic with the dict one.
Ask question author to post hist final code with associationproxy, which he mentioned he used in the end.
There is very similar question with slight interface difference. But it's easy to fix it by defining __getitem__, __setitem__ and __delitem__ methods.
Comment for Brent, above:
You can use session.flush() instead of commit() to get an id on your model instances. flush() will execute the necessary SQL, but will not commit, so you can rollback later if needed.
I ended up combining Denis and van's posts together to form the solution:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
meta = MetaData()
Base = declarative_base(metadata=meta, name='Base')
class PropertyValue(Base):
__tablename__ = 'property_values'
WordID = Column(Integer, ForeignKey('words.id'), primary_key=True)
PropID = Column(Integer, ForeignKey('properties.id'), primary_key=True)
Value = Column(String(20))
def _property_for_name(prop_name):
return s.query(Property).filter_by(name=prop_name).first()
def _create_propval(prop_name, prop_val):
p = _property_for_name(prop_name)
if not p:
p = Property(prop_name)
s.add(p)
s.commit()
return PropertyValue(PropID=p.id, Value=prop_val)
class Word(Base):
__tablename__ = 'words'
id = Column(Integer, primary_key=True)
string = Column(String(20), nullable=False)
freq = Column(Integer)
_props = relation(PropertyValue, collection_class=attribute_mapped_collection('PropID'), cascade='all, delete-orphan')
props = association_proxy('_props', 'Value', creator=_create_propval)
def __init__(self, string, freq=1):
self.string = string
self.freq = freq
def __getitem__(self, prop):
p = _property_for_name(prop)
if p:
return self.props[p.id]
else:
return None
def __setitem__(self, prop, val):
self.props[prop] = val
def __delitem__(self, prop):
p = _property_for_name(prop)
if p:
del self.props[prop]
class Property(Base):
__tablename__ = 'properties'
id = Column(Integer, primary_key=True)
name = Column(String(20), nullable=False, unique=True)
def __init__(self, name):
self.name = name
engine = create_engine('sqlite:///test.db', echo=False)
Session = sessionmaker(bind=engine)
s = Session()
meta.create_all(engine)
The test code is as follows:
word = Word('foo', 42)
word['bar'] = "yes"
word['baz'] = "certainly"
s.add(word)
word2 = Word('quux', 20)
word2['bar'] = "nope"
word2['groink'] = "nope"
s.add(word2)
word2['groink'] = "uh-uh"
del word2['bar']
s.commit()
word = s.query(Word).filter_by(string="foo").first()
print word.freq, word['baz']
# prints 42 certainly
The contents of the databases are:
$ sqlite3 test.db "select * from property_values"
1|2|certainly
1|1|yes
2|3|uh-uh
$ sqlite3 test.db "select * from words"
1|foo|42
2|quux|20
$ sqlite3 test.db "select * from properties"
1|bar
2|baz
3|groink