flask sqlalchemy - dynamically generate data model based on columns in the database

flask sqlalchemy - dynamically generate data model based on columns in the database - python

Is there a way to generate db model dynamically based on the columns in the database table for Flask SQLAlchemy?
I have an application to display data in from a database table, but the column name would change at times and break my app. I would like to have a way to generate the data model dynamically based on the actual column names in the database.
I currently explicitly declare all the columns as below.
class MyDbModel(db.Model):
__tablename__ = 'my_table'
id = db.Column('id', db.NVARCHAR(length=300), primary_key=True)
name= db.Column('name', db.NVARCHAR(length=300),
nullable=True)
I'm not very familiar with sqlalchamy, I have tried the following but is getting an error as below, and I'm not sure if this is the right way to do it.
could not assemble any primary key columns for mapped table
from sqlalchemy import Table, Column
from sqlalchemy.orm import mapper
db = SQLAlchemy()
class MyDbModel(db.Model):
__tablename__ = 'my_table'
def __init__(self):
#helper function to get column headers of a db table
table_cols = get_headers_or_columns(
'my_table'
)
t = Table(
'my_table', db.metadata,
Column('id', db.NVARCHAR(length=300), primary_key=True),
*(Column(table_col, db.NVARCHAR(length=300)) for table_col in table_cols)
)
mapper(self, t)
Any help is appreciated!

I figured this out, in case anyone needed. It's really simple, use a helper function to get the column headers of your table, then just use setattr to set the attributes of the class.
from app import db
class MyDbModel(db.Model):
pass
def map_model_attrs(model, table):
"""
:param model: your db Model Class
:param table your db table name
"""
table_cols = get_headers_or_columns(table)
for col in table_cols:
setattr(
model, col, db.Column(col, db.NVARCHAR(length=300),
nullable=True)
)
map_mode_attrs(MyDbModel, 'my_table')

Related

SQLAlchemy 'entity' for `add_columns` not backed by a table

With a SQLAlchemy query like:
result = db.session.query(Model).add_columns(
func.min(Model.foo).over().label("min_foo"),
func.max(Model.foo).over().label("max_foo"),
# ...
)
The result is an iterable of tuples, consisting of firstly the Model row, and then the added columns.
How can I either:
Contribute the added columns to Model, such that they can be accessed from each element as model.min_foo et al.; or
Map the added columns into a separate dataclass, such that they can be accessed as e.g. extra.min_foo?
The main thing I'm trying to achieve here is access by name - such as the given labels - without enumerating them all as model, min_foo, max_foo, ... and relying on maintaining the same order. With model, *extra, extra is just a plain list of the aggregate values, there's no reference to the label.
If I dynamically add the columns to the model first:
Model.min_foo = Column(Numeric)
then it complains:
Implicitly combining column modeltable.min_foo with column modeltable.min_foo under attribute 'min_foo'.
Please configure one or more attributes for these same-named columns explicitly
Apparently the solution to that is to explicitly join the tables. But this isn't one!
It seems that this ought to be possible with 'mappers', but I can't find any examples that don't explicitly map to a 'table name' or its columns, which I don't really have here - it's not clear to me if/how they can be used with aggregates, or other 'virtual' columns from the query that aren't actually stored in any table.

I think that what you are looking for is a Query-time SQL expressions as mapped attributes:
from sqlalchemy import create_engine, Column, Integer, select, func
from sqlalchemy.orm import (Session, declarative_base, query_expression,
with_expression)
Base = declarative_base()
class Model(Base):
__tablename__ = 'model'
id = Column(Integer, primary_key=True)
foo = Column(Integer)
foo2 = Column(Integer, default=0)
engine = create_engine('sqlite:///', future=True)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
with Session(engine) as session:
session.add(Model(foo=10))
session.add(Model(foo=20))
session.add(Model(foo=30))
session.add(Model(foo=40))
session.add(Model(foo=50, foo2=1))
session.add(Model(foo=60, foo2=1))
session.add(Model(foo=70, foo2=1))
session.add(Model(foo=80))
session.add(Model(foo=90))
session.add(Model(foo=100))
session.commit()
Model.min_foo = query_expression(func.min(Model.foo).over())
stmt = select(Model).where(Model.foo2 == 1)
models = session.execute(stmt).all()
for model, in models:
print(model.min_foo)
with Session(engine) as session:
Model.max_foo = query_expression()
stmt = select(Model).options(with_expression(Model.max_foo,
func.max(Model.foo).over())
).where(Model.foo2 == 1)
models = session.execute(stmt).all()
for model, in models:
print(model.max_foo)
You can define a default expression when defining the query_expression or using .options with with_expression you can define a runtime expression. The only thing is that the Mapped attribute cannot be unmapped and will return None for max_foo as there is no default expression defined.

Creating dynamic classes in SQLAlchemy

We have 1 table with a large amount of data and DBA's partitioned it based on a particular parameter. This means I ended up with Employee_TX, Employee_NY kind of table names. Earlier the models.py was simple as in --
class Employee(Base):
__tablename__ = 'Employee'
name = Column...
state = Column...
Now, I don't want to create 50 new classes for the newly partitioned tables as anyways my columns are the same.
Is there a pattern where I can create a single class and then use it in query dynamically? session.query(<Tablename>).filter().all()
Maybe some kind of Factory pattern or something is what I'm looking for.
So far I've tried by running a loop as
for state in ['CA', 'TX', 'NY']:
class Employee(Base):
__qualname__ = __tablename__ = 'Employee_{}'.format(state)
name = Column...
state = Column...
but this doesn't work and I get a warning as - SAWarning: This declarative base already contains a class with the same class name and module name as app_models.employee, and will be replaced in the string-lookup table.
Also it can't find the generated class when I do from app_models import Employee_TX
This is a flask app with PostgreSQL as a backend and sqlalchemy is used as an ORM

Got it by creating a custom function like -
def get_model(state):
DynamicBase = declarative_base(class_registry=dict())
class MyModel(DynamicBase):
__tablename__ = 'Employee_{}'.format(state)
name = Column...
state = Column...
return MyModel
And then from my services.py, I just call with get_model(TX)

Whenever you think of dynamically constructing classes think of type() with 3 arguments (see this answer for a demonstration, and the docs more generally).
In your case, it's just a matter of constructing the classes and keeping a reference to them so you can access them again later.
Here's an example:
from sqlalchemy import Column, Integer, String
from sqlalchemy.engine import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
# this produces the set of common attributes that each class should have
def attribute_factory():
return dict(
id=Column(Integer, primary_key=True),
name=Column(String, nullable=False),
state=Column(String, nullable=False),
CLASS_VAR=12345678,
)
states = ["CA", "TX", "NY"]
# here we map the state abbreviation to the generated model, notice the templated
# class and table names
model_map = {
state: type(
f"Employee_{state}",
(Base,),
dict(**attribute_factory(), __tablename__=f"Employee_{state}"),
)
for state in states
}
engine = create_engine("sqlite:///", echo=True)
Session = sessionmaker(bind=engine)
Base.metadata.create_all(engine)
if __name__ == "__main__":
# inserts work
s = Session()
for state, model in model_map.items():
s.add(model(name="something", state=state))
s.commit()
s.close()
# queries work
s = Session()
for state, model in model_map.items():
inst = s.query(model).first()
print(inst.state, inst.CLASS_VAR)

How to properly use SQLAlchemy's '#aggregated' class attribute decorator

I'm trying to use SQLAlchemy's #aggregated decorator to define an attribute ('gross_amount)' for a class, Receipt. This gross_amount attribute is the sum of the Item.gross_amount for all Item instances associated with the Receipt instance by a foreign id.
I.E., a receipt is made up of items, and I want to define a receipt 'gross_amount' value which is just the total $ of all of the items on the receipt.
I've modeled my code after this document http://sqlalchemy-utils.readthedocs.io/en/latest/aggregates.html
So it looks like this...
from sqlalchemy import Column, Integer, ForeignKey
from sqlalchemy.sql import func
from sqlalchemy import orm
class Receipt(Base):
__tablename__ = "receipts"
__table_args__ = {'extend_existing': True}
id = Column(Integer, index = True, primary_key = True, nullable = False)
#aggregated('itemz', Column(Integer))
def gross_amount(self):
return func.sum(Item.gross_amount)
itemz = orm.relationship(
'Item',
backref='receipts'
)
class Item(Base):
__tablename__ = "items"
id = Column(Integer, index = True, primary_key = True, nullable = False)
'''
FE relevant
'''
gross_amount = Column(Integer)
receipt_id = Column(Integer, ForeignKey("receipts.id"), nullable=False)
In my migration, am I supposed to have a column in the receipts table for gross_amount?
1) When I DO define this column in the receipts table, any Receipt.gross_amount for any instance just points to the gross_amount values defined in the receipts table.
2) When I DO NOT define this column in the receipts table, I get a SQLAlchemy error whenever I execute a SELECT against the database:
ProgrammingError: (psycopg2.ProgrammingError) column receipts.gross_amount does not exist
FWIW, my SQLAlchemy package is the latest distributed thru PIP...
SQLAlchemy==1.1.11
SQLAlchemy-Utils==0.32.14
And my local db on which I'm running this for now is PostgreSQL 9.6.2
What am I doing wrong here? Any patient help would be greatly appreciated!

Yes, you do need to add the column to table:
CREATE TABLE receipts (
id INTEGER NOT NULL,
gross_amount INTEGER, -- <<< See, it's here :)
PRIMARY KEY (id)
);
INSERT INTO receipts VALUES(1,7);
INSERT INTO receipts VALUES(2,7);
CREATE TABLE items (
id INTEGER NOT NULL,
gross_amount INTEGER,
receipt_id INTEGER NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY(receipt_id) REFERENCES receipts (id)
);
Tested with this self-contained snippet:
from sqlalchemy import Column, Integer, ForeignKey, create_engine, orm
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy_utils import aggregated
Base = declarative_base()
class Receipt(Base):
__tablename__ = "receipts"
__table_args__ = {'extend_existing': True}
id = Column(Integer, index = True, primary_key = True, nullable = False)
#aggregated('itemz', Column(Integer))
def gross_amount(self):
return func.sum(Item.gross_amount)
itemz = orm.relationship('Item', backref='receipts')
class Item(Base):
__tablename__ = "items"
id = Column(Integer, index = True, primary_key = True, nullable = False)
gross_amount = Column(Integer)
receipt_id = Column(Integer, ForeignKey("receipts.id"), nullable=False)
def __init__(self, amount):
self.gross_amount=amount
engine = create_engine('sqlite:///xxx.db', echo=True)
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
receipt = Receipt()
receipt.itemz.append(Item(5))
receipt.itemz.append(Item(2))
session.add(receipt)
session.commit()
print (receipt.gross_amount)
Of course, there's also another approach called hybrid_property, which basically allows you to do both orm- and database level queries without adding extra column do your database:
#hybrid_property
def gross_sum(self):
return sum(i.gross_amount for i in self.itemz)
#gross_sum.expression
def gross_sum(cls):
return select([func.sum(Item.gross_amount)]).\
where(Item.receipt_id==cls.id).\
label('gross_sum')

The reason you're getting this error is because the new column you're adding (gross_amount) has not been created in the receipts table in the database.
Meaning, your current database table only has one created column (id). For the aggregated column to work, it needs to contain an additional column called gross_amount.
This additional column has to allow null values.
One way to go about doing that is through SQL directly in PostgreSQL:
ALTER TABLE receipts ADD gross_amount int;
Alternatively, if there's no data yet, you can drop and recreate the table via SQLAlchemy. It should create this extra column automatically.
I'm not sure what you mean by the last part:
When I DO define this column in the receipts table, any
Receipt.gross_amount for any instance just points to the gross_amount
values defined in the receipts table.
That's where it's supposed to point. I'm not sure what you mean by that. Do you mean that it doesn't contain any values, even though there are values for this receipt's items in Item? If so, I would double check that this is the case (and per their examples here, refresh the database session before seeing the results).

Create a Full Text Search index with SQLAlchemy on PostgreSQL

I need to create a PostgreSQL Full Text Search index in Python with SQLAlchemy. Here's what I want in SQL:
CREATE TABLE person ( id INTEGER PRIMARY KEY, name TEXT );
CREATE INDEX person_idx ON person USING GIN (to_tsvector('simple', name));
Now how do I do the second part with SQLAlchemy when using the ORM:
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)

You could create index using Index in __table_args__. Also I use a function to create ts_vector to make it more tidy and reusable if more than one field is required. Something like below:
from sqlalchemy.dialects import postgresql
def create_tsvector(*args):
exp = args[0]
for e in args[1:]:
exp += ' ' + e
return func.to_tsvector('english', exp)
class Person(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
__ts_vector__ = create_tsvector(
cast(func.coalesce(name, ''), postgresql.TEXT)
)
__table_args__ = (
Index(
'idx_person_fts',
__ts_vector__,
postgresql_using='gin'
)
)
Update:
A sample query using index (corrected based on comments):
people = Person.query.filter(Person.__ts_vector__.match(expressions, postgresql_regconfig='english')).all()

The answer from #sharez is really useful (especially if you need to concatenate columns in your index). For anyone looking to create a tsvector GIN index on a single column, you can simplify the original answer approach with something like:
from sqlalchemy import Column, Index, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
textsearch = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
func.to_tsvector('english', textsearch),
postgresql_using='gin'
),
)
Note that the comma following Index(...) in __table_args__ is not a style choice, the value of __table_args__ must be a tuple, dictionary, or None.
If you do need to create a tsvector GIN index on multiple columns, here is another way to get there using text().
from sqlalchemy import Column, Index, Integer, String, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
def to_tsvector_ix(*columns):
s = " || ' ' || ".join(columns)
return func.to_tsvector('english', text(s))
class Example(Base):
__tablename__ = 'examples'
id = Column(Integer, primary_key=True)
atext = Column(String)
btext = Column(String)
__table_args__ = (
Index(
'ix_examples_tsv',
to_tsvector_ix('atext', 'btext'),
postgresql_using='gin'
),
)

Thanks for this question and answers.
I'd like to add a bit more in case ppl using alembic to manage versions by
using autogenerate
which creating the index seems not be detected.
We might end up writing our own alter script which look like.
"""add fts idx
Revision ID: e3ce1ce23d7a
Revises: 079c4455d54d
Create Date:
"""
# revision identifiers, used by Alembic.
revision = 'e3ce1ce23d7a'
down_revision = '079c4455d54d'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_index('idx_content_fts', 'table_name',
[sa.text("to_tsvector('english', content)")],
postgresql_using='gin')
def downgrade():
op.drop_index('idx_content_fts')

It has been answered already by #sharez and #benvc. I needed to make it work with weights though. This is how I did it based on their answers :
from sqlalchemy import Column, func, Index, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.operators import op
CONFIG = 'english'
Base = declarative_base()
def create_tsvector(*args):
field, weight = args[0]
exp = func.setweight(func.to_tsvector(CONFIG, field), weight)
for field, weight in args[1:]:
exp = op(exp, '||', func.setweight(func.to_tsvector(CONFIG, field), weight))
return exp
class Example(Base):
__tablename__ = 'example'
foo = Column(String)
bar = Column(String)
__ts_vector__ = create_tsvector(
(foo, 'A'),
(bar, 'B')
)
__table_args__ = (
Index('my_index', __ts_vector__, postgresql_using='gin'),
)

Previous answers here were helpful for pointing in the right direction.
Below, a distilled & simplified approach using ORM approach & TSVectorType helper from sqlalchemy-utils (that is quite basic and can be simply copy/pasted to avoid external dependencies if needed https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html):
Defining a TSVECTOR column (TSVectorType) in your ORM model (declarative) populated automatically from the source text field(s)
import sqlalchemy as sa
from sqlalchemy_utils.types.ts_vector import TSVectorType
# ^-- https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ts_vector.html
class MyModel(Base):
__tablename__ = 'mymodel'
id = sa.Column(sa.Integer, primary_key=True)
content = sa.Column(sa.String, nullable=False)
content_tsv = sa.Column(
TSVectorType("content", regconfig="english"),
sa.Computed("to_tsvector('english', \"content\")", persisted=True))
# ^-- equivalent for SQL:
# COLUMN content_tsv TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', "content")) STORED;
__table_args__ = (
# Indexing the TSVector column
sa.Index("idx_mymodel_content_tsv", content_tsv, postgresql_using="gin"),
)
For additional details on querying using ORM, see https://stackoverflow.com/a/73999486/11750716 (there is an important difference between SQLAlchemy 1.4 and SQLAlchemy 2.0).

Dynamically setting tablename for sharding in SQLAlchemy?

In order to handle a growing database table, we are sharding on table name. So we could have database tables that are named like this:
table_md5one
table_md5two
table_md5three
All tables have the exact same schema.
How do we use SQLAlchemy and dynamically specify the tablename for the class that corresponds to this? Looks like the declarative_base() classes need to have tablename pre-specified.
There will eventually be too many tables to manually specify derived classes from a parent/base class. We want to be able to build a class that can have the tablename set up dynamically (maybe passed as a parameter to a function.)

OK, we went with the custom SQLAlchemy declaration rather than the declarative one.
So we create a dynamic table object like this:
from sqlalchemy import MetaData, Table, Column
def get_table_object(self, md5hash):
metadata = MetaData()
table_name = 'table_' + md5hash
table_object = Table(table_name, metadata,
Column('Column1', DATE, nullable=False),
Column('Column2', DATE, nullable=False)
)
clear_mappers()
mapper(ActualTableObject, table_object)
return ActualTableObject
Where ActualTableObject is the class mapping to the table.

In Augmenting the Base you find a way of using a custom Base class that can, for example, calculate the __tablename__ attribure dynamically:
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
The only problem here is that I don't know where your hash comes from, but this should give a good starting point.
If you require this algorithm not for all your tables but only for one you could just use the declared_attr on the table you are interested in sharding.

Because I insist to use declarative classes with their __tablename__ dynamically specified by given parameter, after days of failing with other solutions and hours of studying SQLAlchemy internals, I come up with the following solution that I believe is simple, elegant and race-condition free.
def get_model(suffix):
DynamicBase = declarative_base(class_registry=dict())
class MyModel(DynamicBase):
__tablename__ = 'table_{suffix}'.format(suffix=suffix)
id = Column(Integer, primary_key=True)
name = Column(String)
...
return MyModel
Since they have their own class_registry, you will not get that warning saying:
This declarative base already contains a class with the same class name and module name as mypackage.models.MyModel, and will be replaced in the string-lookup table.
Hence, you will not be able to reference them from other models with string lookup. However, it works perfectly fine to use these on-the-fly declared models for foreign keys as well:
ParentModel1 = get_model(123)
ParentModel2 = get_model(456)
class MyChildModel(BaseModel):
__tablename__ = 'table_child'
id = Column(Integer, primary_key=True)
name = Column(String)
parent_1_id = Column(Integer, ForeignKey(ParentModel1.id))
parent_2_id = Column(Integer, ForeignKey(ParentModel2.id))
parent_1 = relationship(ParentModel1)
parent_2 = relationship(ParentModel2)
If you only use them to query/insert/update/delete without any reference left such as foreign key reference from another table, they, their base classes and also their class_registry will be garbage collected, so no trace will be left.

you can write a function with tablename parameter and send back the class with setting appropriate attributes.
def get_class(table_name):
class GenericTable(Base):
__tablename__ = table_name
ID= Column(types.Integer, primary_key=True)
def funcation(self):
......
return GenericTable
Then you can create a table using:
get_class("test").__table__.create(bind=engine) # See sqlachemy.engine

Try this
import zlib
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, BigInteger, DateTime, String
from datetime import datetime
BASE = declarative_base()
ENTITY_CLASS_DICT = {}
class AbsShardingClass(BASE):
__abstract__ = True
def get_class_name_and_table_name(hashid):
return 'ShardingClass%s' % hashid, 'sharding_class_%s' % hashid
def get_sharding_entity_class(hashid):
"""
#param hashid: hashid
#type hashid: int
#rtype AbsClientUserAuth
"""
if hashid not in ENTITY_CLASS_DICT:
class_name, table_name = get_class_name_and_table_name(hashid)
cls = type(class_name, (AbsShardingClass,),
{'__tablename__': table_name})
ENTITY_CLASS_DICT[hashid] = cls
return ENTITY_CLASS_DICT[hashid]
cls = get_sharding_entity_class(1)
print session.query(cls).get(100)

Instead of using imperative creating Table object, you can use usual declarative_base and make a closure to set a table name as the following:
def make_class(Base, table_name):
class User(Base):
__tablename__ = table_name
id = Column(Integer, primary_key=True)
name= Column(String)
return User
Base = declarative_base()
engine = make_engine()
custom_named_usertable = make_class(Base, 'custom_name')
Base.metadata.create_all(engine)
session = make_session(engine)
new_user = custom_named_usertable(name='Adam')
session.add(new_user)
session.commit()
session.close()
engine.dispose()

just you need to create class object for Base.
from sqlalchemy.ext.declarative import declarative_base, declared_attr
class Base(object):
#declared_attr
def __tablename__(cls):
return cls.__name.lower()
Base = declarative_base(cls=Base)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.