Related
Here's my code / what I've tried. How can I query a json key containing a list?
import sqlalchemy
from sqlalchemy import Column, Integer, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Session = sessionmaker()
Base = declarative_base()
class Track(Base): # noqa: WPS230
__tablename__ = "track"
id = Column(Integer, primary_key=True)
fields = Column(JSON(none_as_null=True), default="{}")
def __init__(self, id):
self.id = id
self.fields = {}
engine = sqlalchemy.create_engine("sqlite:///:memory:")
Session.configure(bind=engine)
Base.metadata.create_all(engine) # creates tables
session = Session()
track1 = Track(id=1)
track2 = Track(id=2)
track1.fields["list"] = ["wow"]
track2.fields["list"] = ["wow", "more", "items"]
session.add(track1)
session.commit()
session.query(Track).filter(Track.fields["list"].as_string() == "wow").one()
session.query(Track).filter(Track.fields["list"].as_string() == "[wow]").one()
session.query(Track).filter(
Track.fields["list"].as_json() == ["wow", "more", "items"]
).one()
I've also tried contains() instead of ==, but that seems to match substrings of elements as well, which I don't want..
I managed to get the behavior I was after by utilizing json_each. To filter against the entire list, I just need to create a new json_each function for each element I want to test against.
#!/usr/bin/env python3
import sqlalchemy
from sqlalchemy import func
from sqlalchemy import Column, Integer, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Session = sessionmaker()
Base = declarative_base()
class Track(Base):
__tablename__ = "track"
id = Column(Integer, primary_key=True)
fields = Column(JSON, default="{}")
def __init__(self, id):
self.id = id
self.fields = {}
engine = sqlalchemy.create_engine("sqlite:///:memory:")
Session.configure(bind=engine)
Base.metadata.create_all(engine)
session = Session()
track = Track(id=1)
track.fields["list"] = ["a", "list"]
session.add(track)
session.commit()
fields_func1 = func.json_each(Track.fields, "$.list").table_valued(
"value", joins_implicitly=True
)
fields_func2 = func.json_each(Track.fields, "$.list").table_valued(
"value", joins_implicitly=True
)
session.query(Track).filter(fields_func1.c.value == "list").one()
session.query(Track).filter(fields_func1.c.value == "a").one()
session.query(Track).filter(
fields_func1.c.value == "a", fields_func2.c.value == "list"
).one()
I'm attempting to create an ORM model to store a dictionary of lists in SQLAlchemy. I've had a little bit of progress based on https://gist.github.com/onecrayon/646da61accf54674d4f5098376a2c5df, but I'm stuck using the code below:
import operator
from sqlalchemy import Column, ForeignKey, Integer, String, create_engine
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm.collections import MappedCollection, collection, _instrument_class
from sqlalchemy.ext.declarative import declarative_base
connect_args = {}
connect_args["check_same_thread"] = False
engine = create_engine("sqlite:///test_orm.sqlite", connect_args=connect_args)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
Base = declarative_base()
class KeyedListCollection(MappedCollection):
def __init__(self, key):
super().__init__(operator.attrgetter(key))
#collection.internally_instrumented
def __setitem__(self, key, value, _sa_initiator=None):
if not super().get(key):
super().__setitem__(key, [], _sa_initiator)
super().__getitem__(key).append(value)
_instrument_class(KeyedListCollection)
class Prop(Base):
__tablename__ = "props"
id = Column(Integer, primary_key=True, index=True)
item_id = Column(Integer, ForeignKey("items.id"))
key = Column(String)
value = Column(String)
item = relationship("Item", back_populates="props")
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, index=True)
props = relationship(
"Prop",
collection_class=lambda: KeyedListCollection("key"),
cascade="all, delete-orphan",
back_populates="item",
)
#property
def props_p(self):
out = {}
for k, vs in self.props.items():
out[k] = [v.value for v in vs]
return out
Base.metadata.create_all(bind=engine)
dat = {
"props": {
"p1": [
"a",
"b",
"c",
],
"p2": [
"d",
"e",
"f",
],
},
}
item = Item()
db.add(item)
db.commit()
db.refresh(item)
props = []
for k, vs in dat["props"].items():
props.extend([Prop(key=k, value=v, item=item) for v in vs])
[db.add(p) for p in props]
db.commit()
item = db.query(Item).order_by(Item.id.desc()).first()
print(item.props_p)
db.delete(item)
db.commit()
db.close()
The db.delete(item) line raises AttributeError: 'list' object has no attribute '_sa_instance_state'. I assume I have to add some sort of delete method to my custom mapper, but I don't know how. I tried overriding __delitem__, but that didn't seem to even get called.
The output from print(item.props_p) is what I'm looking for, but I don't think using a function in the model is the right way to do it because I can't use it the other way, i.e. feed it data with that schema (like the dictionary dat in the example) and have it stored properly.
I know I should be using an association_proxy in some way because I've done that combined with an attribute_mapped_collection to make a dictionary of strings, but I can't figure out how to make it work for a dictionary of lists.
Anyone got ideas for me?
After more fiddling around, I changed Item to:
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, index=True)
_props = relationship(
"Prop",
cascade="all, delete-orphan",
back_populates="item",
)
#property
def props(self):
out = {}
for p in self._props:
if p.key not in out:
out[p.key] = []
out[p.key].append(p.value)
return out
#props.setter
def props(self, props):
ps = []
for k, vs in props.items():
ps.extend([Prop(key=k, value=v) for v in vs])
self._props = ps
And that seems to do exactly what I want. I'm not sure that's the "right way", but it gets the job done.
I'm converting a library to use SQLAlchemy as the datastore. I like the flexibility of the PickleType column, but it doesn't seem to work well when pickling SA objects (table rows). Even if I overload setstate and getstate to do a query + session merge when unpickling, there's no referential integrity across that pickle boundary. That means that I can't query collections of objects.
class Bar(Base):
id = Column(Integer, primary_key=True)
__tablename__ = 'bars'
foo_id = Column(Integer, ForeignKey('foos.id'), primary_key=True)
class Foo(Base):
__tablename__ = 'foos'
values = Column(PickleType)
#values = relationship(Bar) # list interface (one->many), but can't assign a scalar or use a dictionary
def __init__(self):
self.values = [Bar(), Bar()]
# only allowed with PickleType column
#self.values = Bar()
#self.values = {'one' : Bar()}
#self.values = [ [Bar(), Bar()], [Bar(), Bar()]]
# get all Foo's with a Bar whose id=1
session.query(Foo).filter(Foo.values.any(Bar.id == 1)).all()
One workaround would be to implement my own mutable object type as is done here. I'm imagining having some kind of flattening scheme which traverses the collections and appends them to a simpler one->many relationship. Perhaps the flattened list might have to be weakrefs to the pickled collection's objects?
Tracking changes and references sounds like no fun and I can't find any examples of people pickling SA rows anywhere else (perhaps indicative of bad design on my part?). Any advice?
EDIT 1:
After some discussion I've simplified the request. I'm looking for a single property that can behave as either a scalar or a collection. Here is my (failing) attempt:
from sqlalchemy import MetaData, Column, Integer, PickleType, String, ForeignKey, create_engine
from sqlalchemy.orm import relationship, Session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
# from http://www.sqlalchemy.org/trac/browser/examples/vertical
from sqlalchemy_examples.vertical import dictlike_polymorphic as dictlike
metadata = MetaData()
Base = declarative_base()
engine = create_engine('sqlite://', echo=True)
Base.metadata.bind = engine
session = Session(engine)
class AnimalFact(dictlike.PolymorphicVerticalProperty, Base):
"""key/value attribute whose value can be one of several types"""
__tablename__ = 'animalfacts'
type_map = {#str: ('string', 'str_value'),
list: ('list', 'list_value'),
tuple: ('tuple', 'tuple_value')}
id = Column(Integer, primary_key=True)
animal_id = Column(Integer, ForeignKey('animal.id'), primary_key=True)
key = Column(String, primary_key=True)
type = Column(String)
#str_value = Column(String)
list_value = relationship('StringEntry')
tuple_value = relationship('StringEntry2')
class Animal(Base, dictlike.VerticalPropertyDictMixin):
__tablename__ = 'animal'
_property_type = AnimalFact
_property_mapping = 'facts'
id = Column(Integer, primary_key=True)
name = Column(String)
facts = relationship(AnimalFact, backref='animal',
collection_class=attribute_mapped_collection('key'))
def __init__(self, name):
self.name = name
class StringEntry(Base):
__tablename__ = 'stringentry'
id = Column(Integer, primary_key=True)
animalfacts_id = Column(Integer, ForeignKey('animalfacts.id'))
value = Column(String)
def __init__(self, value):
self.value = value
class StringEntry2(Base):
__tablename__ = 'stringentry2'
id = Column(Integer, primary_key=True)
animalfacts_id = Column(Integer, ForeignKey('animalfacts.id'))
value = Column(String)
def __init__(self, value):
self.value = value
Base.metadata.create_all()
a = Animal('aardvark')
a['eyes'] = [StringEntry('left side'), StringEntry('right side')] # works great
a['eyes'] = (StringEntry2('left side'), StringEntry2('right side')) # works great
#a['cute'] = 'sort of' # failure
The PickleType is really a hacky way around edge cases where you have some arbitrary object you'd just like to shove away. It's a given that when you use PickleType, you're giving up any relational advantages, including being able to filter/query on them, etc.
So putting an ORM mapped object in a Pickle is basically a terrible idea.
If you want a collection of scalar values, use traditional mappings and relationship() in combination with association_proxy. See http://docs.sqlalchemy.org/en/rel_0_7/orm/extensions/associationproxy.html#simplifying-scalar-collections .
"or dictionaries". Use attribute_mapped_collection: http://docs.sqlalchemy.org/en/rel_0_7/orm/collections.html#dictionary-collections
"dictionaries plus scalars": combine both attribute_mapped_collection and association_proxy: http://docs.sqlalchemy.org/en/rel_0_7/orm/extensions/associationproxy.html#proxying-to-dictionary-based-collections
Edit 1:
Well, you dug into a really esoteric and complex example there. association_proxy is a much easier way to get around these cases where you want an object to act like a scalar, so here's that, without all that crazy boilerplate of the "vertical" example, which I'd avoid as it is really too complex. Your example seemed undecided about primary key style so I went with the composite version. Surrogate + composite can't be mixed in a single table (well it can, but its relationally incorrect. The key should be the smallest unit that identifies a row - http://en.wikipedia.org/wiki/Unique_key is a good top level read into various subjects regarding this).
from sqlalchemy import Integer, String, Column, create_engine, ForeignKey, ForeignKeyConstraint
from sqlalchemy.orm import relationship, Session
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.associationproxy import association_proxy
Base = declarative_base()
class AnimalFact(Base):
"""key/value attribute whose value can be either a string or a list of strings"""
__tablename__ = 'animalfacts'
# use either surrogate PK id, or the composite animal_id/key - but
# not both. id/animal_id/key all together is not a proper key.
# Personally I'd go for "id" here, but here's the composite version.
animal_id = Column(Integer, ForeignKey('animal.id'), primary_key=True)
key = Column(String, primary_key=True)
# data
str_value = Column(String)
_list_value = relationship('StringEntry')
# proxy list strings
list_proxy = association_proxy('_list_value', 'value')
def __init__(self, key, value):
self.key = key
self.value = value
#property
def value(self):
if self.str_value is not None:
return self.str_value
else:
return self.list_proxy
#value.setter
def value(self, value):
if isinstance(value, basestring):
self.str_value = value
elif isinstance(value, list):
self.list_proxy = value
else:
assert False
class Animal(Base):
__tablename__ = 'animal'
id = Column(Integer, primary_key=True)
name = Column(String)
_facts = relationship(AnimalFact, backref='animal',
collection_class=attribute_mapped_collection('key'))
facts = association_proxy('_facts', 'value')
def __init__(self, name):
self.name = name
# dictionary interface around "facts".
# I'd just use "animal.facts" here, but here's how to skip that.
def __getitem__(self, key):
return self.facts.__getitem__(key)
def __setitem__(self, key, value):
self.facts.__setitem__(key, value)
def __delitem__(self, key):
self.facts.__delitem__(key)
def __contains__(self, key):
return self.facts.__contains__(key)
def keys(self):
return self.facts.keys()
class StringEntry(Base):
__tablename__ = 'myvalue'
id = Column(Integer, primary_key=True)
animal_id = Column(Integer)
key = Column(Integer)
value = Column(String)
# because AnimalFact has a composite PK, we need
# a composite FK.
__table_args__ = (ForeignKeyConstraint(
['key', 'animal_id'],
['animalfacts.key', 'animalfacts.animal_id']),
)
def __init__(self, value):
self.value = value
engine = create_engine('sqlite://', echo=True)
Base.metadata.create_all(engine)
session = Session(engine)
# create a new animal
a = Animal('aardvark')
a['eyes'] = ['left side', 'right side']
a['cute'] = 'sort of'
session.add(a)
session.commit()
session.close()
for animal in session.query(Animal):
print animal.name, ",".join(["%s" % animal[key] for key in animal.keys()])
I'm new to SQLAlchemy and relational databases, and I'm trying to set up a model for an annotated lexicon. I want to support an arbitrary number of key-value annotations for the words which can be added or removed at runtime. Since there will be a lot of repetition in the names of the keys, I don't want to use this solution directly, although the code is similar.
My design has word objects and property objects. The words and properties are stored in separate tables with a property_values table that links the two. Here's the code:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///test.db', echo=True)
meta = MetaData(bind=engine)
property_values = Table('property_values', meta,
Column('word_id', Integer, ForeignKey('words.id')),
Column('property_id', Integer, ForeignKey('properties.id')),
Column('value', String(20))
)
words = Table('words', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('freq', Integer)
)
properties = Table('properties', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20), nullable=False, unique=True)
)
meta.create_all()
class Word(object):
def __init__(self, name, freq=1):
self.name = name
self.freq = freq
class Property(object):
def __init__(self, name):
self.name = name
mapper(Property, properties)
Now I'd like to be able to do the following:
Session = sessionmaker(bind=engine)
s = Session()
word = Word('foo', 42)
word['bar'] = 'yes' # or word.bar = 'yes' ?
s.add(word)
s.commit()
Ideally this should add 1|foo|42 to the words table, add 1|bar to the properties table, and add 1|1|yes to the property_values table. However, I don't have the right mappings and relations in place to make this happen. I get the sense from reading the documentation at http://www.sqlalchemy.org/docs/05/mappers.html#association-pattern that I want to use an association proxy or something of that sort here, but the syntax is unclear to me. I experimented with this:
mapper(Word, words, properties={
'properties': relation(Property, secondary=property_values)
})
but this mapper only fills in the foreign key values, and I need to fill in the other value as well. Any assistance would be greatly appreciated.
Simply use Dictionary-Based Collections mapping mapping - out of the box solution to your question. Extract from the link:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
# ...
item = Item()
item.notes['color'] = Note('color', 'blue')
print item.notes['color']
Or try the solution for Inserting data in Many to Many relationship in SQLAlchemy. Obviously you have to replace the list logic with the dict one.
Ask question author to post hist final code with associationproxy, which he mentioned he used in the end.
There is very similar question with slight interface difference. But it's easy to fix it by defining __getitem__, __setitem__ and __delitem__ methods.
Comment for Brent, above:
You can use session.flush() instead of commit() to get an id on your model instances. flush() will execute the necessary SQL, but will not commit, so you can rollback later if needed.
I ended up combining Denis and van's posts together to form the solution:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
meta = MetaData()
Base = declarative_base(metadata=meta, name='Base')
class PropertyValue(Base):
__tablename__ = 'property_values'
WordID = Column(Integer, ForeignKey('words.id'), primary_key=True)
PropID = Column(Integer, ForeignKey('properties.id'), primary_key=True)
Value = Column(String(20))
def _property_for_name(prop_name):
return s.query(Property).filter_by(name=prop_name).first()
def _create_propval(prop_name, prop_val):
p = _property_for_name(prop_name)
if not p:
p = Property(prop_name)
s.add(p)
s.commit()
return PropertyValue(PropID=p.id, Value=prop_val)
class Word(Base):
__tablename__ = 'words'
id = Column(Integer, primary_key=True)
string = Column(String(20), nullable=False)
freq = Column(Integer)
_props = relation(PropertyValue, collection_class=attribute_mapped_collection('PropID'), cascade='all, delete-orphan')
props = association_proxy('_props', 'Value', creator=_create_propval)
def __init__(self, string, freq=1):
self.string = string
self.freq = freq
def __getitem__(self, prop):
p = _property_for_name(prop)
if p:
return self.props[p.id]
else:
return None
def __setitem__(self, prop, val):
self.props[prop] = val
def __delitem__(self, prop):
p = _property_for_name(prop)
if p:
del self.props[prop]
class Property(Base):
__tablename__ = 'properties'
id = Column(Integer, primary_key=True)
name = Column(String(20), nullable=False, unique=True)
def __init__(self, name):
self.name = name
engine = create_engine('sqlite:///test.db', echo=False)
Session = sessionmaker(bind=engine)
s = Session()
meta.create_all(engine)
The test code is as follows:
word = Word('foo', 42)
word['bar'] = "yes"
word['baz'] = "certainly"
s.add(word)
word2 = Word('quux', 20)
word2['bar'] = "nope"
word2['groink'] = "nope"
s.add(word2)
word2['groink'] = "uh-uh"
del word2['bar']
s.commit()
word = s.query(Word).filter_by(string="foo").first()
print word.freq, word['baz']
# prints 42 certainly
The contents of the databases are:
$ sqlite3 test.db "select * from property_values"
1|2|certainly
1|1|yes
2|3|uh-uh
$ sqlite3 test.db "select * from words"
1|foo|42
2|quux|20
$ sqlite3 test.db "select * from properties"
1|bar
2|baz
3|groink
I am working on a quite large code base that has been implemented using sqlalchemy.ext.declarative, and I need to add a dict-like property to one of the classes. What I need is the same as in this question, but in a declarative fashion. Can anyone with more knowledge in SQLAlchemy give me an example?
Thanks in advance...
Declarative is just another way of defining things. Virtually you end up with the exact same environment than if you used separated mapping.
Since I answered the other question, I'll try this one as well. Hope it gives more upvotes ;)
Well, first we define the classes
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import orm, MetaData, Column, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import column_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///:memory:', echo=True)
Base = declarative_base(bind=engine)
class Note(Base):
__tablename__ = 'notes'
id_item = Column(Integer, ForeignKey('items.id'), primary_key=True)
name = Column(String(20), primary_key=True)
value = Column(String(100))
def __init__(self, name, value):
self.name = name
self.value = value
class Item(Base):
__tablename__ = 'items'
id = Column(Integer, primary_key=True)
name = Column(String(20))
description = Column(String(100))
_notesdict = relation(Note,
collection_class=column_mapped_collection(Note.name))
notes = association_proxy('_notesdict', 'value', creator=Note)
def __init__(self, name, description=''):
self.name = name
self.description = description
Base.metadata.create_all()
Now let's make a test:
Session = sessionmaker(bind=engine)
s = Session()
i = Item('ball', 'A round full ball')
i.notes['color'] = 'orange'
i.notes['size'] = 'big'
i.notes['data'] = 'none'
s.add(i)
s.commit()
print i.notes
I get:
{u'color': u'orange', u'data': u'none', u'size': u'big'}
Now let's check the notes table...
for note in s.query(Note):
print note.id_item, note.name, note.value
I get:
1 color orange
1 data none
1 size big
It works!! :D