I have an entity called a Report which points to a report stored in some repository, and those repository can have versioning, therefore the reports have an optional version.
I am now trying to track the reports in SQL via SQLalchemy. Because one report title can have several versions I wanted to map the entity over two tables, one for the report identification (title and other data I do not include here for simplicity) and one for report versions which reference a report (again plus other data not included here).
I have managed all the above in the following code. But now, I am blocked by the fact that setting the Report.version to a newer version causes an update of report_version and not the insertion of a new version, which means I'll only ever track one version.
from dataclasses import dataclass, field
from sqlalchemy import Column, ForeignKey, Integer, String, Table, \
create_engine, join, select, text
from sqlalchemy.orm import Session, column_property, registry
mapper_registry = registry()
#dataclass
class Report:
title: str
version: str | None = field(default=None) # versioning is optional
report = Table(
"report",
mapper_registry.metadata,
Column("report_pk", Integer, primary_key=True),
Column("title", String(35), nullable=False),
)
report_version = Table(
"report_version",
mapper_registry.metadata,
Column("version_pk", Integer, primary_key=True),
Column("report_fk", ForeignKey("report.report_pk"), nullable=False),
Column("version_id", String(1024), nullable=True),
)
mapper_registry.map_imperatively(
Report,
report.join(report_version),
properties={
"id": column_property(report.c.report_pk, report_version.c.report_fk),
"version": report_version.c.version_id,
},
)
engine = create_engine("sqlite://", echo=True, future=True)
mapper_registry.metadata.create_all(engine)
session = Session(engine)
# setting attribute causes ORM to update `report_update`
r1 = Report(title="r1", version="a")
session.add(r1)
session.flush() # flush r1a
r1.version = "b"
session.flush() # flush r1b
# this is what I would like to achieve
r2 = Report(title="r2", version=".1")
session.add(r2)
session.flush() # flush r2.1
session.execute(
text(
"INSERT INTO report_version (report_fk, version_id) VALUES (:report_id, '.2')"
),
{"report_id": r2.id},
)
session.flush() # flush r2.2
session.execute(text("SELECT * FROM report")).all() # r1 and r2
session.execute(text("SELECT * FROM report_version")).all() # b, .1 and .2
session.execute(select(Report)).scalars().all() # r1b, r2.1 and r2.2
session.close()
In the end, I did not manage the behaviour I wanted over two different tables but went with the simpler SCD type 2, and keep adding rows as the report gets updated, which in the SQLAlchemy documentation is found at Versioning using Temporal Rows.
from dataclasses import dataclass, field
from datetime import datetime
from sqlalchemy import (Boolean, Column, DateTime, Integer, String, Table,
create_engine, event, select, text)
from sqlalchemy.orm import Session, attributes, make_transient, registry
mapper_registry = registry()
#dataclass
class Report:
title: str
version: str | None = field(default=None) # versioning is optional
report = Table(
"report",
mapper_registry.metadata,
Column("id", Integer, primary_key=True),
Column("title", String(35), nullable=False),
Column("version_id", String(1024), nullable=True),
Column("created_at", DateTime, nullable=False, default=datetime.utcnow),
Column("current_flag", Boolean, nullable=False, index=True, default=True),
)
mapper_registry.map_imperatively(
Report, report, properties={"version": report.c.version_id}
)
# SCD II handling for Report
#event.listens_for(Session, "before_flush")
def before_flush(session, flush_context, instances):
for instance in session.dirty:
if any((
not isinstance(instance, Report),
not session.is_modified(instance),
not attributes.instance_state(instance).has_identity,
)):
continue
# unset current flag on previous instance
session.query(Report).filter_by(id=instance.id).update(
values={"current_flag": False}, synchronize_session=False
)
# make instance transient
make_transient(instance)
# remove id and created_at since new ones will be created on add
instance.id = None
instance.created_at = None
# re-add to session with new id and version
session.add(instance)
engine = create_engine("sqlite://", echo=True, future=True)
mapper_registry.metadata.create_all(engine)
session = Session(engine)
# setting attribute causes ORM to update `report_update`
r1 = Report(title="r1", version="a")
session.add(r1)
session.flush() # flush r1a
r1.version = "b"
session.flush() # flush r1b
session.execute(select(Report)).scalars().all() # r1a, r1b
session.execute(select(Report).filter_by(current_flag=True)).scalars().all() # r1b
session.execute(text("SELECT * FROM report")).all()
session.close()
Related
I've been reading various examples from SQLAlchemy documentation for cascade deletes, but nothing I try seems to work. Below is some sample code adapted from that documentation, but using back_populates instead of backref, as I understand that backref is being deprecated.
In the "main" section below, I would expect that deleting the order that "contains" the items would delete the items as well, but that does not happen. Obviously I don't understand something about how to configure these tables... what is it?
# third party imports
from sqlalchemy import Column, ForeignKey, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy_utils import create_database, database_exists
Base = declarative_base()
class Order(Base):
__tablename__ = "business_order"
id = Column(Integer, primary_key=True)
name = Column(String(32))
items = relationship(
"Item", back_populates="order", cascade="all, delete, delete-orphan"
)
class Item(Base):
__tablename__ = "business_item"
id = Column(Integer, primary_key=True)
name = Column(String(32))
order_id = Column(Integer, ForeignKey("business_order.id"))
order = relationship("Order", back_populates="items")
def get_session(url="sqlite:///:memory:", create_db=True):
"""Get a SQLAlchemy Session instance for input database URL.
:param url:
SQLAlchemy URL for database, described here:
http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls.
:param create_db:
Boolean indicating whether to create database from scratch.
:returns:
Sqlalchemy Session instance.
"""
# Create a sqlite in-memory database engine
if not database_exists(url):
if create_db:
create_database(url)
else:
msg = (
"Database does not exist, will not create without "
"create_db turned on."
)
print(msg)
return None
connect_args = {}
engine = create_engine(url, echo=False, connect_args=connect_args)
Base.metadata.create_all(engine)
# create a session object that we can use to insert and
# extract information from the database
Session = sessionmaker(bind=engine, autoflush=False)
session = Session()
return session
if __name__ == "__main__":
sqlite_url = "sqlite:///test_sqlite.db"
session = get_session(sqlite_url)
order = Order(name="order1")
session.add(order)
item = Item(order_id=order.id, name="item1")
session.add(item)
session.commit()
session.delete(order) # should delete items too, right?
session.commit()
orders = session.query(Order).all()
print(len(orders)) # this returns 0 as expected
items = session.query(Item).all()
print(len(items)) # this returns 1, why?
Order has an (implicit) autoincrement PK. When you do
order = Order(name="order1")
session.add(order)
order.id is None. Therefore, when you do
item = Item(order_id=order.id, name="item1")
item.order_id will also be None, so item is actually not associated with order. Therefore, the delete doesn't cascade.
order doesn't get its id until .flush() (or .commit()) is called. So you could either do
order = Order(name="order1")
session.add(order)
session.flush() # !
item = Item(order_id=order.id, name="item1")
session.add(item)
session.commit()
or do
order = Order(name="order1", items=[Item(name="item1")])
session.add(order)
session.commit()
session.delete(order) # should delete items too, right?
session.commit()
orders = session.query(Order).all()
print(len(orders)) # this returns 0 as expected
items = session.query(Item).all()
print(len(items)) # this also returns 0 as expected
I have already read similar questions in SO and on Google, as well as the official SQLAlchemy docs, but still couldn't figure out how to solve my problem.
Consider the following structure (non-relevant fields removed for simplicity):
header_table = Table(
'header',
metadata,
Column('id', Integer, primary_key=True),
Column('parent_header_id', Integer)
)
item_table = Table(
'item',
dal.metadata,
Column('id', Integer, primary_key=True),
Column('header_id', Integer)
)
class Header:
id: int
parent_header_id: int
# Relationships
items: List[Item]
children: List[Header]
class Item:
id: int
header_id: int
mapper(Header, header_table, properties={
'children': relationship(Header, foreign_keys=[header_table.c.parent_header_id]),
})
Just to summarise: you can nest headers (max of 1 level of nesting), and each header can have items.
I'm trying to load all headers, with their items and children, and the items of the children.
header_alias = aliased(Header)
records = (
session.query(Header)
.outerjoin(Header.items)
.outerjoin(Header.children.of_type(header_alias))
# .outerjoin(Header.children.of_type(header_alias).items) <<< THE PROBLEM IS HERE (READ BELOW)
.options(contains_eager(Header.items))
.options(contains_eager(Header.children.of_type(header_alias)))
.all()
)
How do I load the items of the children?
The code commented out in the example is wrong, I just put it there as an example of what I'm trying to do.
Note: The code above works, but it's lazy loading the items of the children, I'm trying to get rid of this lazy loading.
Big thanks to #zzzeek (Mike Bayer), author of SQLAlchemy, who answered the question in Github.
https://github.com/sqlalchemy/sqlalchemy/discussions/6876
OK you have to alias "items" also, this is SQL so every table has to
be in the FROM clause only once. Here's a full running example
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import ForeignKey
from sqlalchemy import Integer
from sqlalchemy import Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import aliased
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import Session
Base = declarative_base()
metadata = Base.metadata
header_table = Table(
"header",
metadata,
Column("id", Integer, primary_key=True),
Column("parent_header_id", ForeignKey("header.id")),
)
item_table = Table(
"item",
metadata,
Column("id", Integer, primary_key=True),
Column("header_id", ForeignKey("header.id")),
)
class Header(Base):
__table__ = header_table
children = relationship("Header")
items = relationship("Item")
class Item(Base):
__table__ = item_table
id: int
header_id: int
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(
Header(
items=[Item(), Item()],
children=[Header(items=[Item()]), Header(items=[Item(), Item()])],
)
)
s.commit()
s.close()
header_alias = aliased(Header)
item_alias = aliased(Item)
records = (
s.query(Header)
.outerjoin(Header.items)
.outerjoin(Header.children.of_type(header_alias))
.outerjoin(header_alias.items.of_type(item_alias))
.options(
contains_eager(Header.items),
contains_eager(Header.children.of_type(header_alias)).options(
contains_eager(header_alias.items.of_type(item_alias))
),
)
.all()
)
s.close()
for r in records:
print(r)
print(r.items)
for c in r.children:
print(c)
print(c.items)
I have two tables that I am doing a left join on where the second table has a bit column. The filter returns columns where that bit is False. However when I check that value later inside a loop it is showing as true.
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy import Column, String, Integer, Boolean, ForeignKey, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm.exc import NoResultFound
# from sqlalchemy.exc import DataError
Base = declarative_base()
class BoardGame(Base):
__tablename__ = 'game_detail'
id = Column(Integer, primary_key=True)
name = Column(String)
description = Column(String)
process = relationship('Process', uselist=False, back_populates='game_detail')
def __repr__(self):
return "<BoardGame(id='{}', name='{}', description='{}'".format(
self.id, self.name, self.description
)
class Process(Base):
__tablename__ = 'process'
game_id = Column(Integer, ForeignKey('game_detail.id'), primary_key=True)
description_bigram = Column(Boolean)
game_detail = relationship('BoardGame', back_populates='process')
sa_engine = create_engine('mysql+pymysql://bgg:blahblahblah#localhost:49000/boardgamegeek?charset=utf8mb4', pool_recycle=3600)
session = Session(bind=sa_engine)
# Set the scripts execution range of data
maximum_games_to_process = 1
for game in session.query(BoardGame.id, Process.description_bigram).join(Process, isouter=True).filter(Process.description_bigram.is_(False)).limit(maximum_games_to_process):
print('description_bigram', type(game.description_bigram), game.description_bigram)
print(game)
Running the above gets me
description_bigram <class 'bool'> True
(1, True)
But looking at the data in HeidiSQL shows me otherwise.
game_id;description_bigram
1;0
2;1
3;0
4;0
5;1
Here is the create code for the process table as shown in HeidiSQL
CREATE TABLE `process` (
`game_id` INT(11) NOT NULL,
`description_bigram` BIT(1) NOT NULL,
PRIMARY KEY (`game_id`),
CONSTRAINT `FK__GAME_ID_DESCRIPTION` FOREIGN KEY (`game_id`) REFERENCES `game_detail` (`id`) ON UPDATE NO ACTION ON DELETE NO ACTION
)
COLLATE='utf8_unicode_ci'
ENGINE=InnoDB
I have seen some posts elude to SQLAlchemy's issues with Boolean columns and people have change their tables and code to work with integers instead (Link is only similar to my situation). I am sure that would work but I have other programs that work just fine with Bit columns and table Boolean definitions so I am not sure why this one isn't working. Is there something under the hood I can look at?
This is the query created by SQLAlchemy
SELECT game_detail.id AS game_detail_id, process.description_bigram AS process_description_bigram
FROM game_detail LEFT OUTER JOIN process ON game_detail.id = process.game_id
WHERE process.description_bigram IS false
I have tried a few things to try and get the column to register its correct value and I just keep ending up with it being true.
Python is version 3.6.5; SQLAlchemy is 1.2.7; 10.3.13-MariaDB
I have a query that I create, it looks like
items = Session.query(Widgets.id).filter_by(
state=WidgetStates.NEW
)
when I look at the str representation of it I see this as the planned query
str(items)
'SELECT widgets.id AS widgets_guid \nFROM widgets \nWHERE widgets.state = %(state_1)s'
However, when I execute the query to get a count with echo=True I see a different query being exected:
items.count()
2014-08-09 11:59:48,875 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1
FROM widgets, (SELECT widgets.id AS widgets_id
FROM widgets
WHERE widgets.state = %(state_1)s) AS anon_1
WHERE widgets.type IN (%(type_1)s)
The problem is that it's going to count the entire widgets table where type equals "FOO_WIDGET". But it's not going to filter the count by state it as I would have expected it to.
I think the issue relates to the Widget model having a polymorphic_identity discriminator applied to it:
class Widget(Model):
class types(object):
FOO_WIDGET = 'foo'
__mapper_args__ = {
'polymorphic_identity': Widget.types.FOO_WIDGET
}
But the issue is it's not using the items query to count, it's using two different tables to get the selected count and one of them does not have any filtering on it. How do I get this query to work as expected?
Runnable Example
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Unicode, Integer, create_engine, MetaData, func
from sqlalchemy.orm import scoped_session, sessionmaker
metadata = MetaData()
Base = declarative_base(metadata=metadata)
widgets = Table(
'widgets', metadata,
Column('id', Integer, primary_key=True),
Column('type', Unicode),
Column('state', Unicode)
)
class Widget(Base):
__table__ = widgets
class types(object):
FOO_WIDGET = 'foo'
BAR_WIDGET = 'bar'
__mapper_args__ = {
'polymorphic_on': widgets.c.type,
}
class FooWidget(Widget):
__mapper_args__ = {
'polymorphic_identity': Widget.types.FOO_WIDGET
}
db_engine = create_engine('sqlite:///:memory:', echo=True)
Session = scoped_session(sessionmaker())
Session.configure(bind=db_engine)
metadata.create_all(db_engine)
items = Session.query(FooWidget.id).filter_by(
state='new'
)
print str(items)
print 'i expect the next statement to print something approximating:'
print '''
select count(*) from widgets where type = 'foo' and state = 'new'
'''
print items.count()
# What this actually prints
'''
2014-08-28 09:55:15,055 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1
FROM widgets, (SELECT widgets.id AS widgets_id
FROM widgets
WHERE widgets.state = ?) AS anon_1
WHERE widgets.type IN (?)
'''
To run this example you need SQLAlchemy (Tested here with SQLA 0.9.7, in my actual app it's 0.7.x, bug exists in both versions)
I'm trying to create a simple representation for an entity deduplication schema using mysql, and using sqlalchemy for programmatic access.
I'm trying to achieve a specific effect which I think is kind of a self-referential query but i'm not sure:
Essentially I have an 'entities' table (with unique entity_id) and an associated Entity object,
and then an entity_groups table which (for simplicity) has a 'group_id' and 'entity_id' columns, so that I 'register' an entity with a group by creating a row for that relation.
this table too is associated with an ORM object - EntityGroup.
Question is, how do i get the EntityGroup object reference all entities in the group?
I expect I need to write something like:
mapper(EntityGroup, entity_groups_table,
properties={
'entities': relationship(
Entity,
.... ?
)
},
and i'm alittle fuzzy on the details. Basically I need all the rows in entity_groups that have the same group_id as the row represented by the object. And then I need to materialize
all the Entity objects associated those rows' entity_id column. This sounds like something achievable by a more verbose Query() operation in sqlalchemy, but i'm not sure how to combine that with the relationship() construct (if at all - perhaps go manual? )
Any help will be useful, I hope I was clear and to the point
You really should not do it using a Query, as if you configure the relationships properly you will get this automatically. Assuming that you use entity_group table solely to store the relationship and nothing else, you should just configure many-to-many relationship as documented. Fully working example should help:
from sqlalchemy import create_engine, Column, Integer, String, MetaData, ForeignKey, Table
from sqlalchemy.orm import relationship, mapper, scoped_session, sessionmaker, backref
from sqlalchemy.ext.associationproxy import association_proxy
# Configure test DB
engine = create_engine(u'sqlite:///:memory:', echo=False)
session = scoped_session(sessionmaker(bind=engine, autoflush=False))
metadata = MetaData()
# tables
entities_table = Table('entities', metadata,
Column('entity_id', Integer, primary_key=True),
)
groups_table = Table('groups', metadata,
Column('group_id', Integer, primary_key=True),
)
entity_groups_table = Table('entity_groups', metadata,
Column('entity_id', Integer, ForeignKey('entities.entity_id'), primary_key=True),
Column('group_id', Integer, ForeignKey('groups.group_id'), primary_key=True),
)
# object model
class Group(object):
def __repr__(self): return "<Group: %d>" % (self.group_id,)
class Entity(object):
def __repr__(self): return "<Entity: %d>" % (self.entity_id,)
# mappers
mapper(Group, groups_table)
mapper(Entity, entities_table,
properties={'groups': relationship(Group, secondary=entity_groups_table, backref='entities')},
)
# create db schema
metadata.create_all(engine)
# == TESTS
# create entities
e1 = Entity()
e2 = Entity()
g1 = Group()
g2 = Group()
g3 = Group()
g1.entities.append(e1)
g2.entities.append(e2)
g3.entities.append(e1)
g3.entities.append(e2)
session.add(e1)
session.add(e2)
session.commit()
# query...
session.expunge_all()
# check Peter
for g in session.query(Group).all():
print "group: ", g, " has ", g.entities
should produce something like:
group: <Group: 1> has [<Entity: 1>]
group: <Group: 2> has [<Entity: 1>, <Entity: 2>]
group: <Group: 3> has [<Entity: 2>]