SQLAlchemy how to join a table from an "aliased" table - python

I have already read similar questions in SO and on Google, as well as the official SQLAlchemy docs, but still couldn't figure out how to solve my problem.
Consider the following structure (non-relevant fields removed for simplicity):
header_table = Table(
'header',
metadata,
Column('id', Integer, primary_key=True),
Column('parent_header_id', Integer)
)
item_table = Table(
'item',
dal.metadata,
Column('id', Integer, primary_key=True),
Column('header_id', Integer)
)
class Header:
id: int
parent_header_id: int
# Relationships
items: List[Item]
children: List[Header]
class Item:
id: int
header_id: int
mapper(Header, header_table, properties={
'children': relationship(Header, foreign_keys=[header_table.c.parent_header_id]),
})
Just to summarise: you can nest headers (max of 1 level of nesting), and each header can have items.
I'm trying to load all headers, with their items and children, and the items of the children.
header_alias = aliased(Header)
records = (
session.query(Header)
.outerjoin(Header.items)
.outerjoin(Header.children.of_type(header_alias))
# .outerjoin(Header.children.of_type(header_alias).items) <<< THE PROBLEM IS HERE (READ BELOW)
.options(contains_eager(Header.items))
.options(contains_eager(Header.children.of_type(header_alias)))
.all()
)
How do I load the items of the children?
The code commented out in the example is wrong, I just put it there as an example of what I'm trying to do.
Note: The code above works, but it's lazy loading the items of the children, I'm trying to get rid of this lazy loading.

Big thanks to #zzzeek (Mike Bayer), author of SQLAlchemy, who answered the question in Github.
https://github.com/sqlalchemy/sqlalchemy/discussions/6876
OK you have to alias "items" also, this is SQL so every table has to
be in the FROM clause only once. Here's a full running example
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import ForeignKey
from sqlalchemy import Integer
from sqlalchemy import Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import aliased
from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import Session
Base = declarative_base()
metadata = Base.metadata
header_table = Table(
"header",
metadata,
Column("id", Integer, primary_key=True),
Column("parent_header_id", ForeignKey("header.id")),
)
item_table = Table(
"item",
metadata,
Column("id", Integer, primary_key=True),
Column("header_id", ForeignKey("header.id")),
)
class Header(Base):
__table__ = header_table
children = relationship("Header")
items = relationship("Item")
class Item(Base):
__table__ = item_table
id: int
header_id: int
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add(
Header(
items=[Item(), Item()],
children=[Header(items=[Item()]), Header(items=[Item(), Item()])],
)
)
s.commit()
s.close()
header_alias = aliased(Header)
item_alias = aliased(Item)
records = (
s.query(Header)
.outerjoin(Header.items)
.outerjoin(Header.children.of_type(header_alias))
.outerjoin(header_alias.items.of_type(item_alias))
.options(
contains_eager(Header.items),
contains_eager(Header.children.of_type(header_alias)).options(
contains_eager(header_alias.items.of_type(item_alias))
),
)
.all()
)
s.close()
for r in records:
print(r)
print(r.items)
for c in r.children:
print(c)
print(c.items)

Related

SQLAlchemy multi-table mapping insert not update on attribute change

I have an entity called a Report which points to a report stored in some repository, and those repository can have versioning, therefore the reports have an optional version.
I am now trying to track the reports in SQL via SQLalchemy. Because one report title can have several versions I wanted to map the entity over two tables, one for the report identification (title and other data I do not include here for simplicity) and one for report versions which reference a report (again plus other data not included here).
I have managed all the above in the following code. But now, I am blocked by the fact that setting the Report.version to a newer version causes an update of report_version and not the insertion of a new version, which means I'll only ever track one version.
from dataclasses import dataclass, field
from sqlalchemy import Column, ForeignKey, Integer, String, Table, \
create_engine, join, select, text
from sqlalchemy.orm import Session, column_property, registry
mapper_registry = registry()
#dataclass
class Report:
title: str
version: str | None = field(default=None) # versioning is optional
report = Table(
"report",
mapper_registry.metadata,
Column("report_pk", Integer, primary_key=True),
Column("title", String(35), nullable=False),
)
report_version = Table(
"report_version",
mapper_registry.metadata,
Column("version_pk", Integer, primary_key=True),
Column("report_fk", ForeignKey("report.report_pk"), nullable=False),
Column("version_id", String(1024), nullable=True),
)
mapper_registry.map_imperatively(
Report,
report.join(report_version),
properties={
"id": column_property(report.c.report_pk, report_version.c.report_fk),
"version": report_version.c.version_id,
},
)
engine = create_engine("sqlite://", echo=True, future=True)
mapper_registry.metadata.create_all(engine)
session = Session(engine)
# setting attribute causes ORM to update `report_update`
r1 = Report(title="r1", version="a")
session.add(r1)
session.flush() # flush r1a
r1.version = "b"
session.flush() # flush r1b
# this is what I would like to achieve
r2 = Report(title="r2", version=".1")
session.add(r2)
session.flush() # flush r2.1
session.execute(
text(
"INSERT INTO report_version (report_fk, version_id) VALUES (:report_id, '.2')"
),
{"report_id": r2.id},
)
session.flush() # flush r2.2
session.execute(text("SELECT * FROM report")).all() # r1 and r2
session.execute(text("SELECT * FROM report_version")).all() # b, .1 and .2
session.execute(select(Report)).scalars().all() # r1b, r2.1 and r2.2
session.close()
In the end, I did not manage the behaviour I wanted over two different tables but went with the simpler SCD type 2, and keep adding rows as the report gets updated, which in the SQLAlchemy documentation is found at Versioning using Temporal Rows.
from dataclasses import dataclass, field
from datetime import datetime
from sqlalchemy import (Boolean, Column, DateTime, Integer, String, Table,
create_engine, event, select, text)
from sqlalchemy.orm import Session, attributes, make_transient, registry
mapper_registry = registry()
#dataclass
class Report:
title: str
version: str | None = field(default=None) # versioning is optional
report = Table(
"report",
mapper_registry.metadata,
Column("id", Integer, primary_key=True),
Column("title", String(35), nullable=False),
Column("version_id", String(1024), nullable=True),
Column("created_at", DateTime, nullable=False, default=datetime.utcnow),
Column("current_flag", Boolean, nullable=False, index=True, default=True),
)
mapper_registry.map_imperatively(
Report, report, properties={"version": report.c.version_id}
)
# SCD II handling for Report
#event.listens_for(Session, "before_flush")
def before_flush(session, flush_context, instances):
for instance in session.dirty:
if any((
not isinstance(instance, Report),
not session.is_modified(instance),
not attributes.instance_state(instance).has_identity,
)):
continue
# unset current flag on previous instance
session.query(Report).filter_by(id=instance.id).update(
values={"current_flag": False}, synchronize_session=False
)
# make instance transient
make_transient(instance)
# remove id and created_at since new ones will be created on add
instance.id = None
instance.created_at = None
# re-add to session with new id and version
session.add(instance)
engine = create_engine("sqlite://", echo=True, future=True)
mapper_registry.metadata.create_all(engine)
session = Session(engine)
# setting attribute causes ORM to update `report_update`
r1 = Report(title="r1", version="a")
session.add(r1)
session.flush() # flush r1a
r1.version = "b"
session.flush() # flush r1b
session.execute(select(Report)).scalars().all() # r1a, r1b
session.execute(select(Report).filter_by(current_flag=True)).scalars().all() # r1b
session.execute(text("SELECT * FROM report")).all()
session.close()

Get last inserted record's Primary Key in "declarative_base()"

I wanna to get Primary Key of last inserted, I already know two way for this :
1) "lastrowid" with "raw SQL"
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, text
engine = create_engine('sqlite://')
meta = MetaData()
tbl = Table('tbl', meta,
Column('f1', Integer, primary_key=True),
Column('f2', String(64))
)
tbl.create(engine)
sql = text("INSERT INTO tbl VALUES (NULL, 'some_data')")
res = engine.execute(sql)
print(res.lastrowid)
2) "inserted_primary_key" with "insert()"
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
engine = create_engine('sqlite://')
meta = MetaData()
tbl = Table('tbl', meta,
Column('f1', Integer, primary_key=True),
Column('f2', String(64))
)
tbl.create(engine)
ins = tbl.insert().values(f2='some_data')
res = engine.execute(ins)
print(res.inserted_primary_key)
but my problem is "declarative_base()"
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine('sqlite://')
Base = declarative_base()
Session = sessionmaker(bind=engine)
session = Session()
class TBL(Base):
__tablename__ = 'tbl'
f1 = Column(Integer, primary_key=True)
f2 = Column(String(64))
Base.metadata.create_all(engine)
rcd = TBL(f2='some_data')
session.add(rcd)
session.commit()
If i do this:
res = session.add(rcd)
It give me "None". or if i do this:
res = session.commit()
same thing happend. My question is:
Is there any good way to access "lastrowid" or "inserted_primary_key" in case of "declarative_base()"?
What is the best approach ?
After calling session.commit(), accessing rcd.f1 will return its generated primary key. SQLAlchemy automatically reloads the object from database after it has been expired by the commit.

How can I create column_properties that use a groupby?

I have this sql query:
select
rooms.*,
COUNT(DISTINCT(o.resident_id)) as resident_count,
COUNT(reviews.id) as review_count,
COUNT(photos.id) as photo_count,
AVG(reviews.rating) as mean_review
from
t_rooms rooms
JOIN
t_room_listings listings on listings.room_id = rooms.id
JOIN
t_occupancies o on o.listing_id = listings.id
LEFT JOIN
t_reviews reviews on reviews.occupancy_id = o.id
LEFT JOIN
t_photos photos on photos.occupancy_id = o.id
GROUP BY rooms.id
Which I know I can write in ORM query form as:
q = (session
.query(
Room,
func.count(func.distinct(Occupancy.resident_id)).label('resident_count'),
func.count(Review.id).label('review_count'),
func.count(Photo.id).label('photo_count'),
(
(3 + func.avg(Review.rating)) / (1 + func.count(Review.rating))
).label('bayesian_rating')
)
.select_from(
join(Room, RoomListing).join(Occupancy).outerjoin(Review).outerjoin(Photo)
)
.group_by(Room.id)
)
for room, res_ct, rev_ct, p_ct in q:
wish_that_I_could_write(room.res_ct, room.rev_ct, room.p_ct, room.score)
But how can I declare resident_count, review_count etc as column_propertys in my Room class, so that I don't need to construct this query each time?
You may achieve this result with mapping query to object like so:
class ExtendedRoom(object):
pass
# q is your query
mapper(ExtendedRoom, q.statement.alias())
for room in session.query(ExtendedRoom).all():
# now room have review_count and other attributes
print(room.review_count)
Here simplified example with column_property.
from sqlalchemy import create_engine, Column, Integer, MetaData, Table, String, func
from sqlalchemy.sql import select
from sqlalchemy.orm import sessionmaker, mapper, column_property
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///:memory:', echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base()
session = Session()
metadata = MetaData()
room = Table('room', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('num', Integer, default=0),
)
metadata.create_all(engine)
statement = select([room]).group_by(room.c.id).alias()
class Room(object):
pass
mapper(Room, statement, properties={
'count': column_property(func.count(statement.c.num)),
})
print(session.query(Room).all())

SQLAlchemy generates a different query for counts than expected with a polymorphic_identity

I have a query that I create, it looks like
items = Session.query(Widgets.id).filter_by(
state=WidgetStates.NEW
)
when I look at the str representation of it I see this as the planned query
str(items)
'SELECT widgets.id AS widgets_guid \nFROM widgets \nWHERE widgets.state = %(state_1)s'
However, when I execute the query to get a count with echo=True I see a different query being exected:
items.count()
2014-08-09 11:59:48,875 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1
FROM widgets, (SELECT widgets.id AS widgets_id
FROM widgets
WHERE widgets.state = %(state_1)s) AS anon_1
WHERE widgets.type IN (%(type_1)s)
The problem is that it's going to count the entire widgets table where type equals "FOO_WIDGET". But it's not going to filter the count by state it as I would have expected it to.
I think the issue relates to the Widget model having a polymorphic_identity discriminator applied to it:
class Widget(Model):
class types(object):
FOO_WIDGET = 'foo'
__mapper_args__ = {
'polymorphic_identity': Widget.types.FOO_WIDGET
}
But the issue is it's not using the items query to count, it's using two different tables to get the selected count and one of them does not have any filtering on it. How do I get this query to work as expected?
Runnable Example
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Unicode, Integer, create_engine, MetaData, func
from sqlalchemy.orm import scoped_session, sessionmaker
metadata = MetaData()
Base = declarative_base(metadata=metadata)
widgets = Table(
'widgets', metadata,
Column('id', Integer, primary_key=True),
Column('type', Unicode),
Column('state', Unicode)
)
class Widget(Base):
__table__ = widgets
class types(object):
FOO_WIDGET = 'foo'
BAR_WIDGET = 'bar'
__mapper_args__ = {
'polymorphic_on': widgets.c.type,
}
class FooWidget(Widget):
__mapper_args__ = {
'polymorphic_identity': Widget.types.FOO_WIDGET
}
db_engine = create_engine('sqlite:///:memory:', echo=True)
Session = scoped_session(sessionmaker())
Session.configure(bind=db_engine)
metadata.create_all(db_engine)
items = Session.query(FooWidget.id).filter_by(
state='new'
)
print str(items)
print 'i expect the next statement to print something approximating:'
print '''
select count(*) from widgets where type = 'foo' and state = 'new'
'''
print items.count()
# What this actually prints
'''
2014-08-28 09:55:15,055 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1
FROM widgets, (SELECT widgets.id AS widgets_id
FROM widgets
WHERE widgets.state = ?) AS anon_1
WHERE widgets.type IN (?)
'''
To run this example you need SQLAlchemy (Tested here with SQLA 0.9.7, in my actual app it's 0.7.x, bug exists in both versions)

SQLAlchemy - Dictionary of tags

I have question regarding the SQLAlchemy. How can I add into my mapped class the dictionary-like attribute, which maps the string keys into string values and which will be stored in the database (in the same or another table as original mapped object). I want this add support for arbitrary tags of my objects.
I found the following example in SQLAlchemy documentation:
from sqlalchemy.orm.collections import column_mapped_collection, attribute_mapped_collection, mapped_collection
mapper(Item, items_table, properties={
# key by column
'notes': relation(Note, collection_class=column_mapped_collection(notes_table.c.keyword)),
# or named attribute
'notes2': relation(Note, collection_class=attribute_mapped_collection('keyword')),
# or any callable
'notes3': relation(Note, collection_class=mapped_collection(lambda entity: entity.a + entity.b))
})
item = Item()
item.notes['color'] = Note('color', 'blue')
But I want the following behavior:
mapper(Item, items_table, properties={
# key by column
'notes': relation(...),
})
item = Item()
item.notes['color'] = 'blue'
It is possible in SQLAlchemy?
Thank you
The simple answer is yes.
Just use an association proxy:
from sqlalchemy import Column, Integer, String, Table, create_engine
from sqlalchemy import orm, MetaData, Column, ForeignKey
from sqlalchemy.orm import relation, mapper, sessionmaker
from sqlalchemy.orm.collections import column_mapped_collection
from sqlalchemy.ext.associationproxy import association_proxy
Create a test environment:
engine = create_engine('sqlite:///:memory:', echo=True)
meta = MetaData(bind=engine)
Define the tables:
tb_items = Table('items', meta,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('description', String(100)),
)
tb_notes = Table('notes', meta,
Column('id_item', Integer, ForeignKey('items.id'), primary_key=True),
Column('name', String(20), primary_key=True),
Column('value', String(100)),
)
meta.create_all()
Classes (note the association_proxy in the class):
class Note(object):
def __init__(self, name, value):
self.name = name
self.value = value
class Item(object):
def __init__(self, name, description=''):
self.name = name
self.description = description
notes = association_proxy('_notesdict', 'value', creator=Note)
Mapping:
mapper(Note, tb_notes)
mapper(Item, tb_items, properties={
'_notesdict': relation(Note,
collection_class=column_mapped_collection(tb_notes.c.name)),
})
Then just test it:
Session = sessionmaker(bind=engine)
s = Session()
i = Item('ball', 'A round full ball')
i.notes['color'] = 'orange'
i.notes['size'] = 'big'
i.notes['data'] = 'none'
s.add(i)
s.commit()
print i.notes
That prints:
{u'color': u'orange', u'data': u'none', u'size': u'big'}
But, are those in the notes table?
>>> print list(tb_notes.select().execute())
[(1, u'color', u'orange'), (1, u'data', u'none'), (1, u'size', u'big')]
It works!! :)
The simple answer is 'no'.
SQLAlchemy is wrapper on a SQL database.
The relation examples you quote translate a relationship between SQL tables into a Python map-like structure to make it slightly simpler to do the SQL SELECT statements and locate rows in another table.
The
item.notes['color'] = Note('color', 'blue')
is essential because the Note is a separate table with two columns. You can't leave the Note part out.
You must define this other SQL table, and you must create objects which are mapped to that SQL table.

Categories

Resources