i've written the following example code to build a search index for the ACL system i am writing. The query in this example resturns all objects that have any of the given ACLs assigned. But i need a query/filter that returns objects that have all ACLs assigned.
Any help is appreciated.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import backref
from sqlalchemy import create_engine
from sqlalchemy import ForeignKey
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy import Column
_db_uri = "sqlite:////tmp/test.sql"
Base = declarative_base()
engine = create_engine(_db_uri, echo=False)
Session = sessionmaker(bind=engine)
class IndexObject(Base):
""" Index object. """
__tablename__ = 'objects'
id = Column(Integer, primary_key=True)
name = Column(String(128), unique=True, nullable=True)
acls = relationship('IndexObjectACL',
cascade = "all,delete",
backref='objects',
lazy='dynamic')
def __repr__(self):
_repr_ =("<IndexObject (name='%s')>" % (self.name))
return _repr_
class IndexObjectACL(Base):
""" Index object ACL. """
__tablename__ = 'acls'
id = Column(Integer, primary_key=True)
value = Column(String(128), nullable=False)
oid = Column(Integer, ForeignKey('objects.id'))
def __repr__(self):
__repr__ = ("<IndexObjectACL (value='%s')>" % (self.value))
return __repr__
object_list = [
"object1",
"object2",
"object3",
]
acl_list = {
"object1" : [
"view",
"edit",
"enable",
"delete",
],
"object2" : [
"view",
"edit",
],
"object3" : [
"enable",
"delete",
],
}
Base.metadata.create_all(engine)
session = Session()
for o in object_list:
acls = []
for acl in acl_list[o]:
a = IndexObjectACL(value=acl)
acls.append(a)
index_object = IndexObject(name=o, acls=acls)
session.add(index_object)
session.commit()
search_acls = [ "enable", "delete" ]
q = session.query(IndexObject)
q = q.join(IndexObject.acls).filter(IndexObjectACL.value.in_(search_acls))
print(q.all())
session.close()
I think this could be an opportunity to use division – in a way. IndexObjectACL divided by SearchAcls should yield IndexObjects that have all the SearchAcls. In other words query for IndexObjects for which no such SearchAcls exist that are not in its IndexObjectACLs:
from sqlalchemy import union, select, literal
# Create an aliased UNION of all the search_acls to query against
search_acls_union = union(*(select([literal(acl).label('acl')])
for acl in search_acls)).alias()
# Query for those IndexObjects where...
# No SearchAcl exists where...
# No IndexObjectACL exists where value == acl AND oid == id
q = session.query(IndexObject).\
filter(~session.query().select_from(search_acls_union).
filter(~IndexObject.acls.any(value=search_acls_union.c.acl)).
exists())
The result of this query is
[<IndexObject (name='object1')>, <IndexObject (name='object3')>]
and if you add
"object4" : [
"enable",
],
"object5" : [
"delete",
],
to your acl_list (and the object names to object_list) for proving that partial matches are not returned, it still returns only objects 1 and 3.
Your original "have any" query could also be rewritten to use a semijoin, or EXISTS in SQL speak:
q = session.query(IndexObject).\
filter(IndexObject.acls.any(
IndexObjectACL.value.in_(search_acls)))
queries = []
acl_q = q.join(IndexObject.acls)
for acl in search_acls:
x = acl_q.filter(IndexObjectACL.value == acl)
queries.append(x)
q = q.intersect(*queries)
I can try to explain it but i am new to sqlalchemy and SQL in general. So i might explain it the wrong way... The join() joins IndexObject and IndexObjectACL tables based on their relationship which results in a new query. This query is used to create a new query for each ACL we want to match using filter(). Finally we use intersect() (SQL INTERSECT) to get all IndexObject that appear in all queries. After some testing it seems like this is a fast way to search objects that have all given ACLs assigned. Its also very pythonic IMHO.
Related
I've been reading various examples from SQLAlchemy documentation for cascade deletes, but nothing I try seems to work. Below is some sample code adapted from that documentation, but using back_populates instead of backref, as I understand that backref is being deprecated.
In the "main" section below, I would expect that deleting the order that "contains" the items would delete the items as well, but that does not happen. Obviously I don't understand something about how to configure these tables... what is it?
# third party imports
from sqlalchemy import Column, ForeignKey, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy_utils import create_database, database_exists
Base = declarative_base()
class Order(Base):
__tablename__ = "business_order"
id = Column(Integer, primary_key=True)
name = Column(String(32))
items = relationship(
"Item", back_populates="order", cascade="all, delete, delete-orphan"
)
class Item(Base):
__tablename__ = "business_item"
id = Column(Integer, primary_key=True)
name = Column(String(32))
order_id = Column(Integer, ForeignKey("business_order.id"))
order = relationship("Order", back_populates="items")
def get_session(url="sqlite:///:memory:", create_db=True):
"""Get a SQLAlchemy Session instance for input database URL.
:param url:
SQLAlchemy URL for database, described here:
http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls.
:param create_db:
Boolean indicating whether to create database from scratch.
:returns:
Sqlalchemy Session instance.
"""
# Create a sqlite in-memory database engine
if not database_exists(url):
if create_db:
create_database(url)
else:
msg = (
"Database does not exist, will not create without "
"create_db turned on."
)
print(msg)
return None
connect_args = {}
engine = create_engine(url, echo=False, connect_args=connect_args)
Base.metadata.create_all(engine)
# create a session object that we can use to insert and
# extract information from the database
Session = sessionmaker(bind=engine, autoflush=False)
session = Session()
return session
if __name__ == "__main__":
sqlite_url = "sqlite:///test_sqlite.db"
session = get_session(sqlite_url)
order = Order(name="order1")
session.add(order)
item = Item(order_id=order.id, name="item1")
session.add(item)
session.commit()
session.delete(order) # should delete items too, right?
session.commit()
orders = session.query(Order).all()
print(len(orders)) # this returns 0 as expected
items = session.query(Item).all()
print(len(items)) # this returns 1, why?
Order has an (implicit) autoincrement PK. When you do
order = Order(name="order1")
session.add(order)
order.id is None. Therefore, when you do
item = Item(order_id=order.id, name="item1")
item.order_id will also be None, so item is actually not associated with order. Therefore, the delete doesn't cascade.
order doesn't get its id until .flush() (or .commit()) is called. So you could either do
order = Order(name="order1")
session.add(order)
session.flush() # !
item = Item(order_id=order.id, name="item1")
session.add(item)
session.commit()
or do
order = Order(name="order1", items=[Item(name="item1")])
session.add(order)
session.commit()
session.delete(order) # should delete items too, right?
session.commit()
orders = session.query(Order).all()
print(len(orders)) # this returns 0 as expected
items = session.query(Item).all()
print(len(items)) # this also returns 0 as expected
We have 1 table with a large amount of data and DBA's partitioned it based on a particular parameter. This means I ended up with Employee_TX, Employee_NY kind of table names. Earlier the models.py was simple as in --
class Employee(Base):
__tablename__ = 'Employee'
name = Column...
state = Column...
Now, I don't want to create 50 new classes for the newly partitioned tables as anyways my columns are the same.
Is there a pattern where I can create a single class and then use it in query dynamically? session.query(<Tablename>).filter().all()
Maybe some kind of Factory pattern or something is what I'm looking for.
So far I've tried by running a loop as
for state in ['CA', 'TX', 'NY']:
class Employee(Base):
__qualname__ = __tablename__ = 'Employee_{}'.format(state)
name = Column...
state = Column...
but this doesn't work and I get a warning as - SAWarning: This declarative base already contains a class with the same class name and module name as app_models.employee, and will be replaced in the string-lookup table.
Also it can't find the generated class when I do from app_models import Employee_TX
This is a flask app with PostgreSQL as a backend and sqlalchemy is used as an ORM
Got it by creating a custom function like -
def get_model(state):
DynamicBase = declarative_base(class_registry=dict())
class MyModel(DynamicBase):
__tablename__ = 'Employee_{}'.format(state)
name = Column...
state = Column...
return MyModel
And then from my services.py, I just call with get_model(TX)
Whenever you think of dynamically constructing classes think of type() with 3 arguments (see this answer for a demonstration, and the docs more generally).
In your case, it's just a matter of constructing the classes and keeping a reference to them so you can access them again later.
Here's an example:
from sqlalchemy import Column, Integer, String
from sqlalchemy.engine import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
# this produces the set of common attributes that each class should have
def attribute_factory():
return dict(
id=Column(Integer, primary_key=True),
name=Column(String, nullable=False),
state=Column(String, nullable=False),
CLASS_VAR=12345678,
)
states = ["CA", "TX", "NY"]
# here we map the state abbreviation to the generated model, notice the templated
# class and table names
model_map = {
state: type(
f"Employee_{state}",
(Base,),
dict(**attribute_factory(), __tablename__=f"Employee_{state}"),
)
for state in states
}
engine = create_engine("sqlite:///", echo=True)
Session = sessionmaker(bind=engine)
Base.metadata.create_all(engine)
if __name__ == "__main__":
# inserts work
s = Session()
for state, model in model_map.items():
s.add(model(name="something", state=state))
s.commit()
s.close()
# queries work
s = Session()
for state, model in model_map.items():
inst = s.query(model).first()
print(inst.state, inst.CLASS_VAR)
Here is an absurd problem with sqlalchemy that seems easy! First, this is my config file for connecting to mysql database:
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
engine = create_engine('mysql://root:#localhost:3306/digi')
and then, I am trying to create a table called 'sale-history' :
from config import *
from sqlalchemy import *
class Sale(Base):
__tablename__ = 'sale-history'
order_id = column(Integer, primary_key= True)
customer_id = column(Integer)
item_id = column(Integer) #froeign key with product list
cartFinalize_dateTime = column(DATETIME)
amount_ordrered = column(Integer)
city_name = column(String(191))
quantity_ordered = column(Integer)
def __repr__(self):
return "<Sale(city_name='%s')>" % (self.city_name)
Sale.__table__
Base.metadata.create_all(engine)
Now, what I wonder is that
Sale.__table__
and
Base.metadata.create_all(engine)
are not known to my code. More accurate, these are not in suggestion options showed by pycharm editor. Debugging the code does not throw any error(returns 0). What should I do to create tables?
I appreciate your consideration so much!
The code is using column to define columns in the table but it should be using Column (note the upper-case "C").
A few tips/comments
Pycharm may provide better support if you avoid the from module import * idiom. You can alias module names if they are to long to type, for example import sqlalchemy as sa
You can see the SQL generated by the engine by passing echo=True to create_engine
Tablenames with hyphens need to be quoted with backticks to be valid. Sqlalchemy does this automatically, but other applications may not. Using underscores instead may be more convenient.
The final code might look like this:
config
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
engine = create_engine('mysql://root:#localhost:3306/test', echo=True)
Model
import sqlachemy as sa
import config
class Sale(Base):
__tablename__ = 'sale-history'
order_id = sa.Column(sa.Integer, primary_key=True)
customer_id = sa.Column(sa.Integer)
item_id = sa.Column(sa.Integer) # foreign key with product list
cartFinalize_dateTime = sa.Column(sa.DATETIME)
amount_ordrered = sa.Column(sa.Integer)
city_name = sa.Column(sa.String(191))
quantity_ordered = sa.Column(sa.Integer)
def __repr__(self):
return "<Sale(city_name='%s')>" % (self.city_name)
Base.metadata.create_all(config.engine)
I need to create a PostgreSQL Full Text Search index in Python with SQLAlchemy. Here's what I want in SQL:
CREATE TABLE person ( id INTEGER PRIMARY KEY, name TEXT );
CREATE INDEX person_idx ON person USING GIN (to_tsvector('simple', name));
Now how do I do the second part with SQLAlchemy when using the ORM:
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
You could create index using Index in __table_args__. Also I use a function to create ts_vector to make it more tidy and reusable if more than one field is required. Something like below:
from sqlalchemy.dialects import postgresql
def create_tsvector(*args):
exp = args[0]
for e in args[1:]:
exp += ' ' + e
return func.to_tsvector('english', exp)
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
__ts_vector__ = create_tsvector(
cast(func.coalesce(name, ''), postgresql.TEXT)
)
__table_args__ = (
Index(
'idx_person_fts',
__ts_vector__,
postgresql_using='gin'
)
)
Update:
A sample query using index (corrected based on comments):
people = Person.query.filter(Person.__ts_vector__.match(expressions, postgresql_regconfig='english')).all()
The answer from #sharez is really useful (especially if you need to concatenate columns in your index). For anyone looking to create a tsvector GIN index on a single column, you can simplify the original answer approach with something like:
from sqlalchemy import Column, Index, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
textsearch = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
func.to_tsvector('english', textsearch),
postgresql_using='gin'
),
)
Note that the comma following Index(...) in __table_args__ is not a style choice, the value of __table_args__ must be a tuple, dictionary, or None.
If you do need to create a tsvector GIN index on multiple columns, here is another way to get there using text().
from sqlalchemy import Column, Index, Integer, String, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
def to_tsvector_ix(*columns):
s = " || ' ' || ".join(columns)
return func.to_tsvector('english', text(s))
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
atext = Column(String)
btext = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
to_tsvector_ix('atext', 'btext'),
postgresql_using='gin'
),
)
Thanks for this question and answers.
I'd like to add a bit more in case ppl using alembic to manage versions by
using autogenerate
which creating the index seems not be detected.
We might end up writing our own alter script which look like.
"""add fts idx
Revision ID: e3ce1ce23d7a
Revises: 079c4455d54d
Create Date:
"""
# revision identifiers, used by Alembic.
revision = 'e3ce1ce23d7a'
down_revision = '079c4455d54d'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_index('idx_content_fts', 'table_name',
[sa.text("to_tsvector('english', content)")],
postgresql_using='gin')
def downgrade():
op.drop_index('idx_content_fts')
It has been answered already by #sharez and #benvc. I needed to make it work with weights though. This is how I did it based on their answers :
from sqlalchemy import Column, func, Index, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.operators import op
CONFIG = 'english'
Base = declarative_base()
def create_tsvector(*args):
field, weight = args[0]
exp = func.setweight(func.to_tsvector(CONFIG, field), weight)
for field, weight in args[1:]:
exp = op(exp, '||', func.setweight(func.to_tsvector(CONFIG, field), weight))
return exp
class Example(Base):
__tablename__ = 'example'
foo = Column(String)
bar = Column(String)
__ts_vector__ = create_tsvector(
(foo, 'A'),
(bar, 'B')
)
__table_args__ = (
Index('my_index', __ts_vector__, postgresql_using='gin'),
)
Previous answers here were helpful for pointing in the right direction.
Below, a distilled & simplified approach using ORM approach & TSVectorType helper from sqlalchemy-utils (that is quite basic and can be simply copy/pasted to avoid external dependencies if needed https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html):
Defining a TSVECTOR column (TSVectorType) in your ORM model (declarative) populated automatically from the source text field(s)
import sqlalchemy as sa
from sqlalchemy_utils.types.ts_vector import TSVectorType
# ^-- https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html
class MyModel(Base):
__tablename__ = 'mymodel'
id = sa.Column(sa.Integer, primary_key=True)
content = sa.Column(sa.String, nullable=False)
content_tsv = sa.Column(
TSVectorType("content", regconfig="english"),
sa.Computed("to_tsvector('english', \"content\")", persisted=True))
# ^-- equivalent for SQL:
# COLUMN content_tsv TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', "content")) STORED;
__table_args__ = (
# Indexing the TSVector column
sa.Index("idx_mymodel_content_tsv", content_tsv, postgresql_using="gin"),
)
For additional details on querying using ORM, see https://stackoverflow.com/a/73999486/11750716 (there is an important difference between SQLAlchemy 1.4 and SQLAlchemy 2.0).
I'm new to SQLAlchemy and relational databases, and I'm trying to set up a model for an annotated lexicon. I want to support an arbitrary number of key-value annotations for the words which can be added or removed at runtime. Since there will be a lot of repetition in the names of the keys, I don't want to use this solution directly, although the code is similar.
My design has word objects and property objects. The words and properties are stored in separate tables with a property_values table that links the two. Here's the code:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///test.db', echo=True)
meta = MetaData(bind=engine)
property_values = Table('property_values', meta,
Column('word_id', Integer, ForeignKey('words.id')),
Column('property_id', Integer, ForeignKey('properties.id')),
Column('value', String(20))
)
words = Table('words', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('freq', Integer)
)
properties = Table('properties', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20), nullable=False, unique=True)
)
meta.create_all()
class Word(object):
def __init__(self, name, freq=1):
self.name = name
self.freq = freq
class Property(object):
def __init__(self, name):
self.name = name
mapper(Property, properties)
Now I'd like to be able to do the following:
Session = sessionmaker(bind=engine)
s = Session()
word = Word('foo', 42)
word['bar'] = 'yes' # or word.bar = 'yes' ?
s.add(word)
s.commit()
Ideally this should add 1|foo|42 to the words table, add 1|bar to the properties table, and add 1|1|yes to the property_values table. However, I don't have the right mappings and relations in place to make this happen. I get the sense from reading the documentation at http://www.sqlalchemy.org/docs/05/mappers.html#association-pattern that I want to use an association proxy or something of that sort here, but the syntax is unclear to me. I experimented with this:
mapper(Word, words, properties={
'properties': relation(Property, secondary=property_values)
})
but this mapper only fills in the foreign key values, and I need to fill in the other value as well. Any assistance would be greatly appreciated.
Simply use Dictionary-Based Collections mapping mapping - out of the box solution to your question. Extract from the link:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
# ...
item = Item()
item.notes['color'] = Note('color', 'blue')
print item.notes['color']
Or try the solution for Inserting data in Many to Many relationship in SQLAlchemy. Obviously you have to replace the list logic with the dict one.
Ask question author to post hist final code with associationproxy, which he mentioned he used in the end.
There is very similar question with slight interface difference. But it's easy to fix it by defining __getitem__, __setitem__ and __delitem__ methods.
Comment for Brent, above:
You can use session.flush() instead of commit() to get an id on your model instances. flush() will execute the necessary SQL, but will not commit, so you can rollback later if needed.
I ended up combining Denis and van's posts together to form the solution:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import MetaData, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
meta = MetaData()
Base = declarative_base(metadata=meta, name='Base')
class PropertyValue(Base):
__tablename__ = 'property_values'
WordID = Column(Integer, ForeignKey('words.id'), primary_key=True)
PropID = Column(Integer, ForeignKey('properties.id'), primary_key=True)
Value = Column(String(20))
def _property_for_name(prop_name):
return s.query(Property).filter_by(name=prop_name).first()
def _create_propval(prop_name, prop_val):
p = _property_for_name(prop_name)
if not p:
p = Property(prop_name)
s.add(p)
s.commit()
return PropertyValue(PropID=p.id, Value=prop_val)
class Word(Base):
__tablename__ = 'words'
id = Column(Integer, primary_key=True)
string = Column(String(20), nullable=False)
freq = Column(Integer)
_props = relation(PropertyValue, collection_class=attribute_mapped_collection('PropID'), cascade='all, delete-orphan')
props = association_proxy('_props', 'Value', creator=_create_propval)
def __init__(self, string, freq=1):
self.string = string
self.freq = freq
def __getitem__(self, prop):
p = _property_for_name(prop)
if p:
return self.props[p.id]
else:
return None
def __setitem__(self, prop, val):
self.props[prop] = val
def __delitem__(self, prop):
p = _property_for_name(prop)
if p:
del self.props[prop]
class Property(Base):
__tablename__ = 'properties'
id = Column(Integer, primary_key=True)
name = Column(String(20), nullable=False, unique=True)
def __init__(self, name):
self.name = name
engine = create_engine('sqlite:///test.db', echo=False)
Session = sessionmaker(bind=engine)
s = Session()
meta.create_all(engine)
The test code is as follows:
word = Word('foo', 42)
word['bar'] = "yes"
word['baz'] = "certainly"
s.add(word)
word2 = Word('quux', 20)
word2['bar'] = "nope"
word2['groink'] = "nope"
s.add(word2)
word2['groink'] = "uh-uh"
del word2['bar']
s.commit()
word = s.query(Word).filter_by(string="foo").first()
print word.freq, word['baz']
# prints 42 certainly
The contents of the databases are:
$ sqlite3 test.db "select * from property_values"
1|2|certainly
1|1|yes
2|3|uh-uh
$ sqlite3 test.db "select * from words"
1|foo|42
2|quux|20
$ sqlite3 test.db "select * from properties"
1|bar
2|baz
3|groink