Create a Full Text Search index with SQLAlchemy on PostgreSQL - python

I need to create a PostgreSQL Full Text Search index in Python with SQLAlchemy. Here's what I want in SQL:
CREATE TABLE person ( id INTEGER PRIMARY KEY, name TEXT );
CREATE INDEX person_idx ON person USING GIN (to_tsvector('simple', name));
Now how do I do the second part with SQLAlchemy when using the ORM:
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)

You could create index using Index in __table_args__. Also I use a function to create ts_vector to make it more tidy and reusable if more than one field is required. Something like below:
from sqlalchemy.dialects import postgresql
def create_tsvector(*args):
exp = args[0]
for e in args[1:]:
exp += ' ' + e
return func.to_tsvector('english', exp)
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
__ts_vector__ = create_tsvector(
cast(func.coalesce(name, ''), postgresql.TEXT)
)
__table_args__ = (
Index(
'idx_person_fts',
__ts_vector__,
postgresql_using='gin'
)
)
Update:
A sample query using index (corrected based on comments):
people = Person.query.filter(Person.__ts_vector__.match(expressions, postgresql_regconfig='english')).all()

The answer from #sharez is really useful (especially if you need to concatenate columns in your index). For anyone looking to create a tsvector GIN index on a single column, you can simplify the original answer approach with something like:
from sqlalchemy import Column, Index, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
textsearch = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
func.to_tsvector('english', textsearch),
postgresql_using='gin'
),
)
Note that the comma following Index(...) in __table_args__ is not a style choice, the value of __table_args__ must be a tuple, dictionary, or None.
If you do need to create a tsvector GIN index on multiple columns, here is another way to get there using text().
from sqlalchemy import Column, Index, Integer, String, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
def to_tsvector_ix(*columns):
s = " || ' ' || ".join(columns)
return func.to_tsvector('english', text(s))
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
atext = Column(String)
btext = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
to_tsvector_ix('atext', 'btext'),
postgresql_using='gin'
),
)

Thanks for this question and answers.
I'd like to add a bit more in case ppl using alembic to manage versions by
using autogenerate
which creating the index seems not be detected.
We might end up writing our own alter script which look like.
"""add fts idx
Revision ID: e3ce1ce23d7a
Revises: 079c4455d54d
Create Date:
"""
# revision identifiers, used by Alembic.
revision = 'e3ce1ce23d7a'
down_revision = '079c4455d54d'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_index('idx_content_fts', 'table_name',
[sa.text("to_tsvector('english', content)")],
postgresql_using='gin')
def downgrade():
op.drop_index('idx_content_fts')

It has been answered already by #sharez and #benvc. I needed to make it work with weights though. This is how I did it based on their answers :
from sqlalchemy import Column, func, Index, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.operators import op
CONFIG = 'english'
Base = declarative_base()
def create_tsvector(*args):
field, weight = args[0]
exp = func.setweight(func.to_tsvector(CONFIG, field), weight)
for field, weight in args[1:]:
exp = op(exp, '||', func.setweight(func.to_tsvector(CONFIG, field), weight))
return exp
class Example(Base):
__tablename__ = 'example'
foo = Column(String)
bar = Column(String)
__ts_vector__ = create_tsvector(
(foo, 'A'),
(bar, 'B')
)
__table_args__ = (
Index('my_index', __ts_vector__, postgresql_using='gin'),
)

Previous answers here were helpful for pointing in the right direction.
Below, a distilled & simplified approach using ORM approach & TSVectorType helper from sqlalchemy-utils (that is quite basic and can be simply copy/pasted to avoid external dependencies if needed https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html):
Defining a TSVECTOR column (TSVectorType) in your ORM model (declarative) populated automatically from the source text field(s)
import sqlalchemy as sa
from sqlalchemy_utils.types.ts_vector import TSVectorType
# ^-- https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html
class MyModel(Base):
__tablename__ = 'mymodel'
id = sa.Column(sa.Integer, primary_key=True)
content = sa.Column(sa.String, nullable=False)
content_tsv = sa.Column(
TSVectorType("content", regconfig="english"),
sa.Computed("to_tsvector('english', \"content\")", persisted=True))
# ^-- equivalent for SQL:
# COLUMN content_tsv TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', "content")) STORED;
__table_args__ = (
# Indexing the TSVector column
sa.Index("idx_mymodel_content_tsv", content_tsv, postgresql_using="gin"),
)
For additional details on querying using ORM, see https://stackoverflow.com/a/73999486/11750716 (there is an important difference between SQLAlchemy 1.4 and SQLAlchemy 2.0).

Related

SQLAlchemy Complex Polymorphic Mapping And Deep Table Inheritance

I have the following table structure (I have simplified it as much as possible, narrowed down the child/inheriting tables [there are additional] and removed all irrelevant columns from the provided tables):
## Base is my declarative_base
class AbstractQuestion(Base):
questionTypeId: Column = Column(
Integer, ForeignKey("luQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {
"polymorphic_identity": 0,
"polymorphic_on": questionTypeId,
}
class MultiChoiceQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
__mapper_args__ = {"polymorphic_identity": 1}
class AbstractSurveyQuestion(AbstractQuestion):
id: Column = Column(Integer, ForeignKey(AbstractQuestion.id), primary_key=True)
surveyQuestionTypeId: Column = Column(
Integer, ForeignKey("luSurveyQuestionTypes.id"), index=True, nullable=False
)
__mapper_args__ = {"polymorphic_identity": 2}
class RatingQuestion(AbstractSurveyQuestion):
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
The challenge I'm facing is, that I'm trying to make AbstractSurveyQuestion have two types of polymorphic mappings - one as a child of AbstractQuestion with a polymorphic_identity that matches the questionTypeId, but I also need it to have a separate polymorphic_on mapper for its own child table, which is RatingQuestion.
The closest thing I could find was this question, but it doesn't seem to be aiming at exactly what I'm looking for.
I also looked at the official docs about inheritance, but again couldn't find an accurate example to what I'm trying to achieve.
Can anyone please help me with this?
Thanks!
I posted the same question on SQLAlchemy's GitHub repo. Got this answer from the maintainer:
https://github.com/sqlalchemy/sqlalchemy/discussions/8089#discussioncomment-2878725
I'll paste the contents below as well:
it sounds like you are looking for mult-level polymorphic_on. We don't support that right now without workarounds, and that's #2555 which is a feature we're unlikely to implement, or if we did it would be a long time from now.
It looks like you are using joined inheritance....so...two ways. The more SQL efficient one is to have an extra "supplemetary" column on your base table that can discriminate for AbstractSurveyQuestion...because if you query for all the AbstractQuestion objects, by default it's just going to query that one table, and needs to know from each row if that row is in fact a RatingQuestion.
the more convoluted way is to use mapper-configured with_polymorphic so that all queries for AbstractQuestion include all the tables (or a subset of tables, can be configured, but at minimum you'd need to join out to AbstractSurveyQuestion) using a LEFT OUTER JOIN (or if you really wanted to go crazy it can be a UNION ALL).
the workarounds are a little ugly since it's not very easy to get a "composed" value out of two columns in SQL, but they are contained to the base classes. Below examples work on SQLite and might need tweaking for other databases.
Here's the discriminator on base table demo, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, aq.d2 AS aq_d2, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(aq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on
FROM aq
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import inspect
from sqlalchemy import Integer, func
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
d2 = Column(
Integer, nullable=True
) # this is a "supplementary" discrim column
__mapper_args__ = {
"polymorphic_identity": ident_(0),
"polymorphic_on": cast(d1, String)
+ ", "
+ cast(func.coalesce(d2, ""), String),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2)}
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)
then there's the one that maintains your schema fully, a query here looks like:
SELECT aq.id AS aq_id, aq.d1 AS aq_d1, CAST(aq.d1 AS VARCHAR) || ? || CAST(coalesce(acq.d2, ?) AS VARCHAR) AS _sa_polymorphic_on, acq.id AS acq_id, acq.d2 AS acq_d2
FROM aq LEFT OUTER JOIN acq ON aq.id = acq.id
from typing import Tuple, Optional
from sqlalchemy import cast
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import func
from sqlalchemy import inspect
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session
Base = declarative_base()
class ident_(str):
"""describe a composed identity.
Using a string for easy conversion to a string SQL composition.
"""
_tup: Tuple[int, Optional[int]]
def __new__(cls, d1, d2=None):
self = super().__new__(cls, f"{d1}, {d2 or ''}")
self._tup = d1, d2
return self
def _as_tuple(self):
return self._tup
class AbstractQuestion(Base):
__tablename__ = "aq"
id = Column(Integer, primary_key=True)
d1 = Column(
Integer, nullable=False
) # this can be your FK to the other table etc.
__mapper_args__ = {
"polymorphic_identity": ident_(0),
}
#event.listens_for(AbstractQuestion, "init", propagate=True)
def _setup_poly(target, args, kw):
"""receive new AbstractQuestion objects when they are constructed and
set polymorphic identity"""
# this is the ident_() object
ident = inspect(target).mapper.polymorphic_identity
d1, d2 = ident._as_tuple()
kw["d1"] = d1
if d2:
kw["d2"] = d2
class MultiChoiceQuestion(AbstractQuestion):
__tablename__ = "mcq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(1)}
class AbstractSurveyQuestion(AbstractQuestion):
__tablename__ = "acq"
id: Column = Column(
Integer, ForeignKey(AbstractQuestion.id), primary_key=True
)
d2 = Column(Integer, nullable=False)
__mapper_args__ = {
"polymorphic_identity": ident_(2),
"polymorphic_load": "inline", # adds ASQ to all AQ queries
}
# after ASQ is set up, set the discriminator on the base class
# that includes ASQ column
inspect(AbstractQuestion)._set_polymorphic_on(
cast(AbstractQuestion.d1, String)
+ ", "
+ cast(func.coalesce(AbstractSurveyQuestion.d2, ""), String)
)
class RatingQuestion(AbstractSurveyQuestion):
__tablename__ = "rq"
id: Column = Column(
Integer, ForeignKey(AbstractSurveyQuestion.id), primary_key=True
)
__mapper_args__ = {"polymorphic_identity": ident_(2, 1)}
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(MultiChoiceQuestion())
s.add(RatingQuestion())
s.commit()
s.close()
for q in s.query(AbstractQuestion):
print(q)

Create one-to-one and one-to-many between two tables in sqlalchemy

What I'm trying to do seems simple. I'd like to have a parent Organization which has child Groups. However, one group will be the "main_group". Each Organization should have a main group. Each Group should have a reference to the Organization in which it belongs.
I've been able to create_all and use these Models but in my tests when I do a drop_all, I get a
sqlalchemy.exc.CircularDependencyError: Can't sort tables for DROP; an unresolvable foreign key dependency exists between tables: groups, organizations. Please ensure that the ForeignKey and ForeignKeyConstraint objects involved in the cycle have names so that they can be dropped using DROP CONSTRAINT.
Here is a minimum example to show what I'm trying to do. I've left some commented lines in to show what all I've tried.
from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import ForeignKey
from sqlalchemy.sql.sqltypes import Boolean, Date, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, ForeignKey, String, Column
Base = declarative_base()
class Organization(Base):
__tablename__ = "organizations"
id = Column(Integer(), primary_key=True)
name = Column(String(100))
### Define one-to-one with Group
main_group_id = Column(Integer, ForeignKey("groups.id"))
main_group = relationship(
"Group",
# uselist=False,
# back_populates="organization",
foreign_keys=[main_group_id],
primaryjoin="Group.id==Organization.main_group_id",
post_update=True
# backref="organization"
)
####
## Defines the one-to-many collection with Group
# groups = relationship(
# "Group",
# uselist=True,
# # foreign_keys="Group.organization_id",
# primaryjoin="Group.id==Organization.main_group_id",
# # back_populates="organization",
# )
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True)
name = Column(String(100))
organization_id = Column(Integer, ForeignKey("organizations.id"), nullable=False)
organization = relationship(
"Organization",
uselist=False,
# backref="groups",
foreign_keys=[organization_id],
primaryjoin="Group.organization_id==Organization.id",
# primaryjoin="Group.id==Organization.main_group_id",
post_update=True,
)
from sqlalchemy import create_engine
from sqlalchemy.orm.session import sessionmaker
conn_string = "sqlite:///example.sql"
engine = create_engine(conn_string)
Base.metadata.create_all(engine) # here we create all tables
Session = sessionmaker(bind=engine)
session = Session()
new_org = Organization(name="my new org")
print(new_org)
session.add(new_org)
org = session.query(Organization).get(1)
new_group = Group(name="my main group", organization=org, organization_id=org.id)
new_org.main_group = new_group
session.commit()
Base.metadata.drop_all(engine)
From The Docs
There is actually an example pretty similar to what you want here, which uses an explicitly named foreign key, fk_favorite_entry:
rows-that-point-to-themselves-mutually-dependent-rows
That doesn't seem to fully solve the drop warning, which maybe seems to be dialect dependent. You can fully solve it with use_alter=True which is explained here:
creating-dropping-foreign-key-constraints-via-alter
Likely Solution
Best case would be to name the atypical constraint and to set use_alter=True on it as well.
class Organization(Base):
# ...
main_group_id = Column(Integer, ForeignKey("groups.id", name="fk_main_group", use_alter=True))
# ...

SQLAlchemy: how to detect/suppress duplicate JOIN clauses?

Consider the following example code (using SQLAlchemy 1.4):
import os
from sqlalchemy import Column, ForeignKey, Integer, String, select
from sqlalchemy.orm import backref, declarative_base, relationship
Base = declarative_base()
class Parent(Base):
__tablename__ = "parent"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String)
type = Column(String)
class Child(Base):
__tablename__ = "child"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String)
parent_id = Column(Integer, ForeignKey("parent.id"))
parent = relationship("Parent", backref=backref("children"))
def select_parent_name(statement):
return statement.join(Child.parent).add_columns(Parent.name)
def filter_by_parent_name(statement, parent_name):
return statement.join(Child.parent).where(Parent.name == parent_name)
def build_query():
statement = select(Child.id)
if os.getenv("SELECT_PARENT_NAME", True):
statement = select_parent_name(statement)
if os.getenv("FILTER_BY_PARENT", True):
statement = filter_by_parent_name(statement, "foo")
return statement
if __name__ == "__main__":
print(str(build_query()))
This produces invalid SQL, with the same JOIN clause represented twice:
SELECT child.id, parent.name
FROM child JOIN parent ON parent.id = child.parent_id JOIN parent ON parent.id = child.parent_id
WHERE parent.name = :name_1
If executed, it will result in:
(MySQLdb._exceptions.OperationalError) (1066, "Not unique table/alias: 'parent'")
It's a stripped-down trivial example, but the point I'm trying to demonstrate is that I'm building up an SQL statement by passing it around to different functions, each of which has a different responsibility and might require the addition of a JOIN which could have already been added to the statement.
Is there an easy way to suppress duplicate JOINs like this? Or to inspect the statement to see if the redundant JOIN is already present? Ideally this information would be easily determined from the statement object itself, rather than having to maintain and pass around that state separately.
In SQLAlchemy>=1.4 the joined tables can be found in statement._setup_joins:
joined_tables = [joins[0].parent.entity for joins in statement._setup_joins]
For SQLAlchemy<1.4 the joined tables can be found in statement._join_entities:
joined_tables = [mapper.class_ for mapper in statement._join_entities]
Reference for SQLAlchemy<1.4: Can I inspect a sqlalchemy query object to find the already joined tables?

Python/SQLAlchemy - Need to convert an inner join query with 3 tables into Python

I need some help converting this query into SQLAlchemy.
select field from table t1 join table t2 on t1.detail_id = t2.id join table t3 on t3.id = t2.rate_id where t2.name = 'fred' and t3.rate_type = 'Custom' and t3.description = 'Default';
I have been able to convert inner join queries with two tables, but need some help with this one.
Appreciate your help.TIA.
If simple SQL query is enough you can try:
session.execute("SELECT t1.field AS t1_field "
"FROM t1 JOIN t2 ON t1.detail_id = t2.id JOIN t3 ON t2.rate_id = t3.id "
"WHERE t2.name = :name AND t3.rate_type = :rate_type AND t3.description = :description",
{'name': 'fred', 'rate_type': 'Custom', 'description': 'Default'})
But if you want to use SQLAlchemy declarative base then the query would look like:
results = session.query(T1.field).join(T2, T1.detail_id == T2.id).join(T3, T2.rate_id == T3.id).\
filter(T2.name == 'fred').\
filter(T3.rate_type == 'custom').\
filter(T3.description == 'lorem ipsum').all()
For the following models:
from sqlalchemy import create_engine, Integer, ForeignKey, String, Column
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class T1(Base):
__tablename__ = 't1'
id = Column(Integer, primary_key=True)
field = Column(String)
detail_id = Column(Integer, ForeignKey("t1.id"))
class T2(Base):
__tablename__ = 't2'
id = Column(Integer, primary_key=True)
name = Column(String)
rate_id = Column(Integer, ForeignKey("t1.id"))
class T3(Base):
__tablename__ = 't3'
id = Column(Integer, primary_key=True)
rate_type = Column(String)
description = Column(String)
I hope it helps.
SQLAlchemy provide both ORM way and SQL way to operate database. You can use exactly the raw SQL language (or SQLAlachemy SQL Express) to query.
(1) RAW SQL QUERY,Sample code:
engine = create_engine(...)
q = 'SELECT foo FROM t_bar WHERE col_name=:v_parameters'
rs = engine.execute(sqlalchemy.text(q), v_parameters=your_actual_value)
Check execute and basic usage. Also take look at ResultProxy to understand how to operate on returned result.
(2) ORM. If you want to use ORM, firstly you have to define models and mapper class. Sample Code.
from sqlalchemy import Column, ForeignKey
from sqlalchemy.types import String, Integer
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Father(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# Relationship attributes
children = relationship('Sons',
passive_deletes=True,
back_populates='father')
class Son(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# foreign keys
p_id = Column(Integer, ForeignKey('Father.id',
ondelete='CASCADE',
onupdate='CASCADE'))
# Relationship attributes
parent = relationship('Father',
passive_deletes=True,
back_populates='sons')
Then you can do ORM query operations
session.query(Father).join(Father.sons).filter(Son.name=='Sam')
is equal to SQL query
SELECT father.id, father.name FROM father JOIN son ON father.id=son.p_id WHERE son.name='Sam'.
Please check ORM mapper and ORM Query for more information.
For you application. If you have well mapped all of your tables, then you can use ORM way. If you do not need ORM feature, you can just use RAW SQL query.
Thank.

Dynamically setting __tablename__ for sharding in SQLAlchemy?

In order to handle a growing database table, we are sharding on table name. So we could have database tables that are named like this:
table_md5one
table_md5two
table_md5three
All tables have the exact same schema.
How do we use SQLAlchemy and dynamically specify the tablename for the class that corresponds to this? Looks like the declarative_base() classes need to have tablename pre-specified.
There will eventually be too many tables to manually specify derived classes from a parent/base class. We want to be able to build a class that can have the tablename set up dynamically (maybe passed as a parameter to a function.)
OK, we went with the custom SQLAlchemy declaration rather than the declarative one.
So we create a dynamic table object like this:
from sqlalchemy import MetaData, Table, Column
def get_table_object(self, md5hash):
metadata = MetaData()
table_name = 'table_' + md5hash
table_object = Table(table_name, metadata,
Column('Column1', DATE, nullable=False),
Column('Column2', DATE, nullable=False)
)
clear_mappers()
mapper(ActualTableObject, table_object)
return ActualTableObject
Where ActualTableObject is the class mapping to the table.
In Augmenting the Base you find a way of using a custom Base class that can, for example, calculate the __tablename__ attribure dynamically:
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
The only problem here is that I don't know where your hash comes from, but this should give a good starting point.
If you require this algorithm not for all your tables but only for one you could just use the declared_attr on the table you are interested in sharding.
Because I insist to use declarative classes with their __tablename__ dynamically specified by given parameter, after days of failing with other solutions and hours of studying SQLAlchemy internals, I come up with the following solution that I believe is simple, elegant and race-condition free.
def get_model(suffix):
DynamicBase = declarative_base(class_registry=dict())
class MyModel(DynamicBase):
__tablename__ = 'table_{suffix}'.format(suffix=suffix)
id = Column(Integer, primary_key=True)
name = Column(String)
...
return MyModel
Since they have their own class_registry, you will not get that warning saying:
This declarative base already contains a class with the same class name and module name as mypackage.models.MyModel, and will be replaced in the string-lookup table.
Hence, you will not be able to reference them from other models with string lookup. However, it works perfectly fine to use these on-the-fly declared models for foreign keys as well:
ParentModel1 = get_model(123)
ParentModel2 = get_model(456)
class MyChildModel(BaseModel):
__tablename__ = 'table_child'
id = Column(Integer, primary_key=True)
name = Column(String)
parent_1_id = Column(Integer, ForeignKey(ParentModel1.id))
parent_2_id = Column(Integer, ForeignKey(ParentModel2.id))
parent_1 = relationship(ParentModel1)
parent_2 = relationship(ParentModel2)
If you only use them to query/insert/update/delete without any reference left such as foreign key reference from another table, they, their base classes and also their class_registry will be garbage collected, so no trace will be left.
you can write a function with tablename parameter and send back the class with setting appropriate attributes.
def get_class(table_name):
class GenericTable(Base):
__tablename__ = table_name
ID= Column(types.Integer, primary_key=True)
def funcation(self):
......
return GenericTable
Then you can create a table using:
get_class("test").__table__.create(bind=engine) # See sqlachemy.engine
Try this
import zlib
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, BigInteger, DateTime, String
from datetime import datetime
BASE = declarative_base()
ENTITY_CLASS_DICT = {}
class AbsShardingClass(BASE):
__abstract__ = True
def get_class_name_and_table_name(hashid):
return 'ShardingClass%s' % hashid, 'sharding_class_%s' % hashid
def get_sharding_entity_class(hashid):
"""
#param hashid: hashid
#type hashid: int
#rtype AbsClientUserAuth
"""
if hashid not in ENTITY_CLASS_DICT:
class_name, table_name = get_class_name_and_table_name(hashid)
cls = type(class_name, (AbsShardingClass,),
{'__tablename__': table_name})
ENTITY_CLASS_DICT[hashid] = cls
return ENTITY_CLASS_DICT[hashid]
cls = get_sharding_entity_class(1)
print session.query(cls).get(100)
Instead of using imperative creating Table object, you can use usual declarative_base and make a closure to set a table name as the following:
def make_class(Base, table_name):
class User(Base):
__tablename__ = table_name
id = Column(Integer, primary_key=True)
name= Column(String)
return User
Base = declarative_base()
engine = make_engine()
custom_named_usertable = make_class(Base, 'custom_name')
Base.metadata.create_all(engine)
session = make_session(engine)
new_user = custom_named_usertable(name='Adam')
session.add(new_user)
session.commit()
session.close()
engine.dispose()
just you need to create class object for Base.
from sqlalchemy.ext.declarative import declarative_base, declared_attr
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name.lower()
Base = declarative_base(cls=Base)

Categories

Resources