SQLAlchemy ORM Collection Dict of Lists - python

I'm attempting to create an ORM model to store a dictionary of lists in SQLAlchemy. I've had a little bit of progress based on https://gist.github.com/onecrayon/646da61accf54674d4f5098376a2c5df, but I'm stuck using the code below:
import operator
from sqlalchemy import Column, ForeignKey, Integer, String, create_engine
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm.collections import MappedCollection, collection, _instrument_class
from sqlalchemy.ext.declarative import declarative_base
connect_args = {}
connect_args["check_same_thread"] = False
engine = create_engine("sqlite:///test_orm.sqlite", connect_args=connect_args)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
Base = declarative_base()
class KeyedListCollection(MappedCollection):
def __init__(self, key):
super().__init__(operator.attrgetter(key))
#collection.internally_instrumented
def __setitem__(self, key, value, _sa_initiator=None):
if not super().get(key):
super().__setitem__(key, [], _sa_initiator)
super().__getitem__(key).append(value)
_instrument_class(KeyedListCollection)
class Prop(Base):
__tablename__ = "props"
id = Column(Integer, primary_key=True, index=True)
item_id = Column(Integer, ForeignKey("items.id"))
key = Column(String)
value = Column(String)
item = relationship("Item", back_populates="props")
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, index=True)
props = relationship(
"Prop",
collection_class=lambda: KeyedListCollection("key"),
cascade="all, delete-orphan",
back_populates="item",
)
#property
def props_p(self):
out = {}
for k, vs in self.props.items():
out[k] = [v.value for v in vs]
return out
Base.metadata.create_all(bind=engine)
dat = {
"props": {
"p1": [
"a",
"b",
"c",
],
"p2": [
"d",
"e",
"f",
],
},
}
item = Item()
db.add(item)
db.commit()
db.refresh(item)
props = []
for k, vs in dat["props"].items():
props.extend([Prop(key=k, value=v, item=item) for v in vs])
[db.add(p) for p in props]
db.commit()
item = db.query(Item).order_by(Item.id.desc()).first()
print(item.props_p)
db.delete(item)
db.commit()
db.close()
The db.delete(item) line raises AttributeError: 'list' object has no attribute '_sa_instance_state'. I assume I have to add some sort of delete method to my custom mapper, but I don't know how. I tried overriding __delitem__, but that didn't seem to even get called.
The output from print(item.props_p) is what I'm looking for, but I don't think using a function in the model is the right way to do it because I can't use it the other way, i.e. feed it data with that schema (like the dictionary dat in the example) and have it stored properly.
I know I should be using an association_proxy in some way because I've done that combined with an attribute_mapped_collection to make a dictionary of strings, but I can't figure out how to make it work for a dictionary of lists.
Anyone got ideas for me?

After more fiddling around, I changed Item to:
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, index=True)
_props = relationship(
"Prop",
cascade="all, delete-orphan",
back_populates="item",
)
#property
def props(self):
out = {}
for p in self._props:
if p.key not in out:
out[p.key] = []
out[p.key].append(p.value)
return out
#props.setter
def props(self, props):
ps = []
for k, vs in props.items():
ps.extend([Prop(key=k, value=v) for v in vs])
self._props = ps
And that seems to do exactly what I want. I'm not sure that's the "right way", but it gets the job done.

Related

SQLAlchemy method to get ORM object as dict?

Take the following code:
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String
engine = create_engine('postgresql://postgres:password#localhost:5432/db', echo=True, echo_pool='debug')
Base = declarative_base()
class Item(Base):
__tablename__ = 'items'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return "<Item(id=%s, name='%s')>" % (self.id, self.name)
item = Item(name="computer")
Is there a way to get a python dict of the Item with all its fields? For example, I would like to get the following returned:
item.to_dict()
{"id": None, "name": "computer"}
Or do I have to write my own method to do that?
Here would be one way to do it:
class MyBase(Base):
__abstract__ = True
def to_dict(self):
return {field.name:getattr(self, field.name) for field in self.__table__.c}
class Item(MyBase):
# as before
item = Item(name="computer")
item.to_dict()
# {'id': None, 'name': 'computer'}
Also, a lot of these usability simplifications can be found in: https://github.com/absent1706/sqlalchemy-mixins.

SQLAlchemy Complex Polymorphic Mapping And Deep Table Inheritance

I have the following table structure (I have simplified it as much as possible, narrowed down the child/inheriting tables [there are additional] and removed all irrelevant columns from the provided tables):
## Base is my declarative_base
class AbstractQuestion(Base):
questionTypeId: Column = Column(
Integer, ForeignKey("luQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {
"polymorphic_identity": 0,
"polymorphic_on": questionTypeId,
}
class MultiChoiceQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
__mapper_args__ = {"polymorphic_identity": 1}
class AbstractSurveyQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
surveyQuestionTypeId: Column = Column(
Integer, ForeignKey("luSurveyQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {"polymorphic_identity": 2}
class RatingQuestion(AbstractSurveyQuestion):
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
The challenge I'm facing is, that I'm trying to make AbstractSurveyQuestion have two types of polymorphic mappings - one as a child of AbstractQuestion with a polymorphic_identity that matches the questionTypeId, but I also need it to have a separate polymorphic_on mapper for its own child table, which is RatingQuestion.
The closest thing I could find was this question, but it doesn't seem to be aiming at exactly what I'm looking for.
I also looked at the official docs about inheritance, but again couldn't find an accurate example to what I'm trying to achieve.
Can anyone please help me with this?
Thanks!
I posted the same question on SQLAlchemy's GitHub repo. Got this answer from the maintainer:
https://github.com/sqlalchemy/sqlalchemy/discussions/8089#discussioncomment-2878725
I'll paste the contents below as well:
it sounds like you are looking for mult-level polymorphic_on. We don't support that right now without workarounds, and that's #2555 which is a feature we're unlikely to implement, or if we did it would be a long time from now.
It looks like you are using joined inheritance....so...two ways. The more SQL efficient one is to have an extra "supplemetary" column on your base table that can discriminate for AbstractSurveyQuestion...because if you query for all the AbstractQuestion objects, by default it's just going to query that one table, and needs to know from each row if that row is in fact a RatingQuestion.
the more convoluted way is to use mapper-configured with_polymorphic so that all queries for AbstractQuestion include all the tables (or a subset of tables, can be configured, but at minimum you'd need to join out to AbstractSurveyQuestion) using a LEFT OUTER JOIN (or if you really wanted to go crazy it can be a UNION ALL).
the workarounds are a little ugly since it's not very easy to get a "composed" value out of two columns in SQL, but they are contained to the base classes. Below examples work on SQLite and might need tweaking for other databases.
Here's the discriminator on base table demo, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, aq.d2 AS aq_d2, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(aq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on
FROM aq
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import inspect
from sqlalchemy import Integer, func
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
d2 = Column(
Integer, nullable=True
) # this is a "supplementary" discrim column
__mapper_args__ = {
"polymorphic_identity": ident_(0),
"polymorphic_on": cast(d1, String)
+ ", "
+ cast(func.coalesce(d2, ""), String),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2)}
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)
then there's the one that maintains your schema fully, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(acq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on, acq.id AS acq_id, acq.d2 AS acq_d2
FROM aq LEFT OUTER JOIN acq ON aq.id = acq.id
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import func
from sqlalchemy import inspect
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
__mapper_args__ = {
"polymorphic_identity": ident_(0),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
d2 = Column(Integer, nullable=False)
__mapper_args__ = {
"polymorphic_identity": ident_(2),
"polymorphic_load": "inline", # adds ASQ to all AQ queries
}
# after ASQ is set up, set the discriminator on the base class
# that includes ASQ column
inspect(AbstractQuestion)._set_polymorphic_on(
cast(AbstractQuestion.d1, String)
+ ", "
+ cast(func.coalesce(AbstractSurveyQuestion.d2, ""), String)
)
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)

Creating a query with few related tables in Pyramid with SQLAlchemy

I have defined few tables in Pyramid like this:
# coding: utf-8
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Float, DateTime, ForeignKey, ForeignKeyConstraint, String, Column
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref,
from zope.sqlalchemy import ZopeTransactionExtension
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
Base = declarative_base()
class Codes(Base):
__tablename__ = 'Code'
__table_args__ = {u'schema': 'Locations'}
id = Column(Integer, nullable=False)
code_str = Column(String(9), primary_key=True)
name = Column(String(100))
incoming = relationship(u'Voyages', primaryjoin='Voyage.call == Codes.code_str', backref=backref('Code'))
class Locations(Base):
__tablename__ = 'Location'
__table_args__ = {u'schema': 'Locations'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
timestamp = Column(DateTime, primary_key=True, nullable=False)
longitude = Column(Float)
latitude = Column(Float)
class Voyages(Base):
__tablename__ = 'Voyage'
__table_args__ = (ForeignKeyConstraint(['unit_id', 'Voyage_id'], [u'Locations.Voyages.unit_id', u'Locations.Voyages.voyage_id'], ondelete=u'RESTRICT', onupdate=u'CASCADE'), {u'schema': 'Locations'}
)
uid = Column(Integer, primary_key=True)
unit_id = Column(Integer)
voyage_id = Column(Integer)
departure = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
call = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
departure_date = Column(DateTime)
voyage_departure = relationship(u'Codes', primaryjoin='Voyage.departure == Codes.code_str')
voyage_call = relationship(u'Codes', primaryjoin='Voyage.call == Codes.code_str')
class Definitions(Base):
__tablename__ = 'Definition'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(Integer, primary_key=True)
name = Column(String(90))
type = Column(ForeignKey(u'Structure.Type.id', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
locations = relationship(u'Locations', backref=backref('Definition'))
dimensions = relationship(u'Dimensions', backref=backref('Definition'))
types = relationship(u'Types', backref=backref('Definition'))
voyages = relationship(u'Voyages', backref=backref('Definition'))
class Dimensions(Base):
__tablename__ = 'Dimension'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
length = Column(Float)
class Types(Base):
__tablename__ = 'Type'
__table_args__ = {u'schema': 'Structure'}
id = Column(SmallInteger, primary_key=True)
type_name = Column(String(255))
type_description = Column(String(255))
What I am trying to do here is to find a specific row from Codes table (filter it by code_str) and get all related tables in return, but under the condition that Location table returns only the last row by timestamp, Voyage table must return only the last row by departure, and it must have all information from Definitions table.
I started to create a query from the scratch and came across something like this:
string_to_search = request.matchdict.get('code')
sub_dest = DBSession.query(func.max(Voyage.departure).label('latest_voyage_timestamp'), Voyage.unit_id, Voyage.call.label('destination_call')).\
filter(Voyage.call== string_to_search).\
group_by(Voyage.unit_id, Voyage.call).\
subquery()
query = DBSession.query(Codes, Voyage).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
but I have notice that when I iterate through my results (like for code, voyage in query) I am actually iterating every Voyage I get in return. In theory it is not a big problem for me but I am trying to construct some json response with basic information from Codes table which would include all possible Voyages (if there is any at all).
For example:
code_data = {}
all_units = []
for code, voyage in query:
if code_data is not {}:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure_date) if voyage.departure_date else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
'voyages': exception.message if exception else all_units,
}
Now, this seems a bit wrong because I don't like rewriting this code_data in each loop, so I put if code_data is not {} line here, but I suppose it would be much better (logical) to iterate in a way similar to this:
for code in query:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
for voyage in code.voyages:
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure) if voyage.departure else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
}
So, to get only single Code in return (since I queried the db for that specific Code) which would then have all Voyages related to it as a nested value, and of course, in each Voyage all other information related to Definition of the particular Unit...
Is my approach good at all in the first place, and how could I construct my query in order to iterate it in this second way?
I'm using Python 2.7.6, SQLAlchemy 0.9.7 and Pyramid 1.5.1 with Postgres database.
Thanks!
Try changing the outer query like so:
query = DBSession.query(Codes).options(contains_eager('incoming')).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
In case of problems, try calling the options(...) part like so:
(...) .options(contains_eager(Codes.incoming)). (...)
This should result in a single Codes instance being returned with Voyages objects accessible via the relationship you've defined (incoming), so you could proceed with:
results = query.all()
for code in results:
print code
# do something with code.incoming
# actually, you should get only one code so if it proves to work, you should
# use query.one() so that in case something else than a single Code is returned,
# an exception is thrown
of course you need an import, e.g.: from sqlalchemy.orm import contains_eager

Can the same #property present both scalar and collection behavior in SQLAlchemy?

I'm converting a library to use SQLAlchemy as the datastore. I like the flexibility of the PickleType column, but it doesn't seem to work well when pickling SA objects (table rows). Even if I overload setstate and getstate to do a query + session merge when unpickling, there's no referential integrity across that pickle boundary. That means that I can't query collections of objects.
class Bar(Base):
id = Column(Integer, primary_key=True)
__tablename__ = 'bars'
foo_id = Column(Integer, ForeignKey('foos.id'), primary_key=True)
class Foo(Base):
__tablename__ = 'foos'
values = Column(PickleType)
#values = relationship(Bar) # list interface (one->many), but can't assign a scalar or use a dictionary
def __init__(self):
self.values = [Bar(), Bar()]
# only allowed with PickleType column
#self.values = Bar()
#self.values = {'one' : Bar()}
#self.values = [ [Bar(), Bar()], [Bar(), Bar()]]
# get all Foo's with a Bar whose id=1
session.query(Foo).filter(Foo.values.any(Bar.id == 1)).all()
One workaround would be to implement my own mutable object type as is done here. I'm imagining having some kind of flattening scheme which traverses the collections and appends them to a simpler one->many relationship. Perhaps the flattened list might have to be weakrefs to the pickled collection's objects?
Tracking changes and references sounds like no fun and I can't find any examples of people pickling SA rows anywhere else (perhaps indicative of bad design on my part?). Any advice?
EDIT 1:
After some discussion I've simplified the request. I'm looking for a single property that can behave as either a scalar or a collection. Here is my (failing) attempt:
from sqlalchemy import MetaData, Column, Integer, PickleType, String, ForeignKey, create_engine
from sqlalchemy.orm import relationship, Session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
# from http://www.sqlalchemy.org/trac/browser/examples/vertical
from sqlalchemy_examples.vertical import dictlike_polymorphic as dictlike
metadata = MetaData()
Base = declarative_base()
engine = create_engine('sqlite://', echo=True)
Base.metadata.bind = engine
session = Session(engine)
class AnimalFact(dictlike.PolymorphicVerticalProperty, Base):
"""key/value attribute whose value can be one of several types"""
__tablename__ = 'animalfacts'
type_map = {#str: ('string', 'str_value'),
list: ('list', 'list_value'),
tuple: ('tuple', 'tuple_value')}
id = Column(Integer, primary_key=True)
animal_id = Column(Integer, ForeignKey('animal.id'), primary_key=True)
key = Column(String, primary_key=True)
type = Column(String)
#str_value = Column(String)
list_value = relationship('StringEntry')
tuple_value = relationship('StringEntry2')
class Animal(Base, dictlike.VerticalPropertyDictMixin):
__tablename__ = 'animal'
_property_type = AnimalFact
_property_mapping = 'facts'
id = Column(Integer, primary_key=True)
name = Column(String)
facts = relationship(AnimalFact, backref='animal',
collection_class=attribute_mapped_collection('key'))
def __init__(self, name):
self.name = name
class StringEntry(Base):
__tablename__ = 'stringentry'
id = Column(Integer, primary_key=True)
animalfacts_id = Column(Integer, ForeignKey('animalfacts.id'))
value = Column(String)
def __init__(self, value):
self.value = value
class StringEntry2(Base):
__tablename__ = 'stringentry2'
id = Column(Integer, primary_key=True)
animalfacts_id = Column(Integer, ForeignKey('animalfacts.id'))
value = Column(String)
def __init__(self, value):
self.value = value
Base.metadata.create_all()
a = Animal('aardvark')
a['eyes'] = [StringEntry('left side'), StringEntry('right side')] # works great
a['eyes'] = (StringEntry2('left side'), StringEntry2('right side')) # works great
#a['cute'] = 'sort of' # failure
The PickleType is really a hacky way around edge cases where you have some arbitrary object you'd just like to shove away. It's a given that when you use PickleType, you're giving up any relational advantages, including being able to filter/query on them, etc.
So putting an ORM mapped object in a Pickle is basically a terrible idea.
If you want a collection of scalar values, use traditional mappings and relationship() in combination with association_proxy. See http://docs.sqlalchemy.org/en/rel_0_7/orm/extensions/associationproxy.html#simplifying-scalar-collections .
"or dictionaries". Use attribute_mapped_collection: http://docs.sqlalchemy.org/en/rel_0_7/orm/collections.html#dictionary-collections
"dictionaries plus scalars": combine both attribute_mapped_collection and association_proxy: http://docs.sqlalchemy.org/en/rel_0_7/orm/extensions/associationproxy.html#proxying-to-dictionary-based-collections
Edit 1:
Well, you dug into a really esoteric and complex example there. association_proxy is a much easier way to get around these cases where you want an object to act like a scalar, so here's that, without all that crazy boilerplate of the "vertical" example, which I'd avoid as it is really too complex. Your example seemed undecided about primary key style so I went with the composite version. Surrogate + composite can't be mixed in a single table (well it can, but its relationally incorrect. The key should be the smallest unit that identifies a row - http://en.wikipedia.org/wiki/Unique_key is a good top level read into various subjects regarding this).
from sqlalchemy import Integer, String, Column, create_engine, ForeignKey, ForeignKeyConstraint
from sqlalchemy.orm import relationship, Session
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.associationproxy import association_proxy
Base = declarative_base()
class AnimalFact(Base):
"""key/value attribute whose value can be either a string or a list of strings"""
__tablename__ = 'animalfacts'
# use either surrogate PK id, or the composite animal_id/key - but
# not both. id/animal_id/key all together is not a proper key.
# Personally I'd go for "id" here, but here's the composite version.
animal_id = Column(Integer, ForeignKey('animal.id'), primary_key=True)
key = Column(String, primary_key=True)
# data
str_value = Column(String)
_list_value = relationship('StringEntry')
# proxy list strings
list_proxy = association_proxy('_list_value', 'value')
def __init__(self, key, value):
self.key = key
self.value = value
#property
def value(self):
if self.str_value is not None:
return self.str_value
else:
return self.list_proxy
#value.setter
def value(self, value):
if isinstance(value, basestring):
self.str_value = value
elif isinstance(value, list):
self.list_proxy = value
else:
assert False
class Animal(Base):
__tablename__ = 'animal'
id = Column(Integer, primary_key=True)
name = Column(String)
_facts = relationship(AnimalFact, backref='animal',
collection_class=attribute_mapped_collection('key'))
facts = association_proxy('_facts', 'value')
def __init__(self, name):
self.name = name
# dictionary interface around "facts".
# I'd just use "animal.facts" here, but here's how to skip that.
def __getitem__(self, key):
return self.facts.__getitem__(key)
def __setitem__(self, key, value):
self.facts.__setitem__(key, value)
def __delitem__(self, key):
self.facts.__delitem__(key)
def __contains__(self, key):
return self.facts.__contains__(key)
def keys(self):
return self.facts.keys()
class StringEntry(Base):
__tablename__ = 'myvalue'
id = Column(Integer, primary_key=True)
animal_id = Column(Integer)
key = Column(Integer)
value = Column(String)
# because AnimalFact has a composite PK, we need
# a composite FK.
__table_args__ = (ForeignKeyConstraint(
['key', 'animal_id'],
['animalfacts.key', 'animalfacts.animal_id']),
)
def __init__(self, value):
self.value = value
engine = create_engine('sqlite://', echo=True)
Base.metadata.create_all(engine)
session = Session(engine)
# create a new animal
a = Animal('aardvark')
a['eyes'] = ['left side', 'right side']
a['cute'] = 'sort of'
session.add(a)
session.commit()
session.close()
for animal in session.query(Animal):
print animal.name, ",".join(["%s" % animal[key] for key in animal.keys()])

Setting up relations/mappings for a SQLAlchemy many-to-many database

I'm new to SQLAlchemy and relational databases, and I'm trying to set up a model for an annotated lexicon. I want to support an arbitrary number of key-value annotations for the words which can be added or removed at runtime. Since there will be a lot of repetition in the names of the keys, I don't want to use this solution directly, although the code is similar.
My design has word objects and property objects. The words and properties are stored in separate tables with a property_values table that links the two. Here's the code:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///test.db', echo=True)
meta = MetaData(bind=engine)
property_values = Table('property_values', meta,
Column('word_id', Integer, ForeignKey('words.id')),
Column('property_id', Integer, ForeignKey('properties.id')),
Column('value', String(20))
)
words = Table('words', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('freq', Integer)
)
properties = Table('properties', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20), nullable=False, unique=True)
)
meta.create_all()
class Word(object):
def __init__(self, name, freq=1):
self.name = name
self.freq = freq
class Property(object):
def __init__(self, name):
self.name = name
mapper(Property, properties)
Now I'd like to be able to do the following:
Session = sessionmaker(bind=engine)
s = Session()
word = Word('foo', 42)
word['bar'] = 'yes' # or word.bar = 'yes' ?
s.add(word)
s.commit()
Ideally this should add 1|foo|42 to the words table, add 1|bar to the properties table, and add 1|1|yes to the property_values table. However, I don't have the right mappings and relations in place to make this happen. I get the sense from reading the documentation at http://www.sqlalchemy.org/docs/05/mappers.html#association-pattern that I want to use an association proxy or something of that sort here, but the syntax is unclear to me. I experimented with this:
mapper(Word, words, properties={
'properties': relation(Property, secondary=property_values)
})
but this mapper only fills in the foreign key values, and I need to fill in the other value as well. Any assistance would be greatly appreciated.
Simply use Dictionary-Based Collections mapping mapping - out of the box solution to your question. Extract from the link:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
# ...
item = Item()
item.notes['color'] = Note('color', 'blue')
print item.notes['color']
Or try the solution for Inserting data in Many to Many relationship in SQLAlchemy. Obviously you have to replace the list logic with the dict one.
Ask question author to post hist final code with associationproxy, which he mentioned he used in the end.
There is very similar question with slight interface difference. But it's easy to fix it by defining __getitem__, __setitem__ and __delitem__ methods.
Comment for Brent, above:
You can use session.flush() instead of commit() to get an id on your model instances. flush() will execute the necessary SQL, but will not commit, so you can rollback later if needed.
I ended up combining Denis and van's posts together to form the solution:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
meta = MetaData()
Base = declarative_base(metadata=meta, name='Base')
class PropertyValue(Base):
__tablename__ = 'property_values'
WordID = Column(Integer, ForeignKey('words.id'), primary_key=True)
PropID = Column(Integer, ForeignKey('properties.id'), primary_key=True)
Value = Column(String(20))
def _property_for_name(prop_name):
return s.query(Property).filter_by(name=prop_name).first()
def _create_propval(prop_name, prop_val):
p = _property_for_name(prop_name)
if not p:
p = Property(prop_name)
s.add(p)
s.commit()
return PropertyValue(PropID=p.id, Value=prop_val)
class Word(Base):
__tablename__ = 'words'
id = Column(Integer, primary_key=True)
string = Column(String(20), nullable=False)
freq = Column(Integer)
_props = relation(PropertyValue, collection_class=attribute_mapped_collection('PropID'), cascade='all, delete-orphan')
props = association_proxy('_props', 'Value', creator=_create_propval)
def __init__(self, string, freq=1):
self.string = string
self.freq = freq
def __getitem__(self, prop):
p = _property_for_name(prop)
if p:
return self.props[p.id]
else:
return None
def __setitem__(self, prop, val):
self.props[prop] = val
def __delitem__(self, prop):
p = _property_for_name(prop)
if p:
del self.props[prop]
class Property(Base):
__tablename__ = 'properties'
id = Column(Integer, primary_key=True)
name = Column(String(20), nullable=False, unique=True)
def __init__(self, name):
self.name = name
engine = create_engine('sqlite:///test.db', echo=False)
Session = sessionmaker(bind=engine)
s = Session()
meta.create_all(engine)
The test code is as follows:
word = Word('foo', 42)
word['bar'] = "yes"
word['baz'] = "certainly"
s.add(word)
word2 = Word('quux', 20)
word2['bar'] = "nope"
word2['groink'] = "nope"
s.add(word2)
word2['groink'] = "uh-uh"
del word2['bar']
s.commit()
word = s.query(Word).filter_by(string="foo").first()
print word.freq, word['baz']
# prints 42 certainly
The contents of the databases are:
$ sqlite3 test.db "select * from property_values"
1|2|certainly
1|1|yes
2|3|uh-uh
$ sqlite3 test.db "select * from words"
1|foo|42
2|quux|20
$ sqlite3 test.db "select * from properties"
1|bar
2|baz
3|groink

Categories

Resources