Auto-aliasing issues when selecting by relationship count with SQLAlchemy + Postgres - python

Given the following code:
from sqlalchemy import Column, ForeignKey, Integer, alias, create_engine, func, select
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
Base = declarative_base()
engine = create_engine(
"postgresql+psycopg2://***:***#127.0.0.1:5432/***", future=True
)
Session = sessionmaker(engine)
class Foo(Base):
__tablename__ = "foo"
id = Column(Integer, primary_key=True)
bars = relationship("Bar", uselist=True, secondary="foo_bar")
baz_id = Column(ForeignKey("baz.id"))
baz = relationship("Baz", back_populates="foos", lazy="joined")
class Bar(Base):
__tablename__ = "bar"
id = Column(Integer, primary_key=True)
class Baz(Base):
__tablename__ = "baz"
id = Column(Integer, primary_key=True)
foos = relationship(Foo, uselist=True)
class FooBar(Base):
__tablename__ = "foo_bar"
foo_id = Column(ForeignKey(Foo.id), primary_key=True)
bar_id = Column(ForeignKey(Bar.id), primary_key=True)
Base.metadata.create_all(engine)
stmt = (
select(Foo)
.join(FooBar, FooBar.foo_id == Foo.id)
.group_by(Foo.id)
.having(func.count(FooBar.foo_id) == 2)
)
Session().execute(stmt)
I want to select all Foos with exactly two Bars.
But I'm running into the following error:
column "baz_1.id" must appear in the GROUP BY clause or be used in an aggregate function
The generated SQL is:
SELECT foo.id, foo.baz_id, baz_1.id AS id_1
FROM foo JOIN foo_bar ON foo_bar.foo_id = foo.id
LEFT OUTER JOIN baz AS baz_1 ON baz_1.id = foo.baz_id GROUP BY foo.id
HAVING count(foo_bar.foo_id) = :count_1
Now I get what Postgres wants me to do, but I'm not sure how to achieve this, since I can't add baz_1.id to the GROUP PY clause because it's something that SQLAlchemy generates on the fly and I don't have any control over it.

Baz is being included in the query because of the lazy='joined' option on the relationship in Foo. We can override that option in the query, so that the join is not executed and the query works as desired.
stmt = (
select(Foo)
.options(orm.lazyload(Foo.baz)) # <- don't automatically join Baz.
.join(FooBar, FooBar.foo_id == Foo.id)
.group_by(Foo.id)
.having(func.count(FooBar.foo_id) == 2)
)
Generated SQL:
SELECT foo.id, foo.baz_id
FROM foo
JOIN foo_bar ON foo_bar.foo_id = foo.id
GROUP BY foo.id
HAVING count(foo_bar.foo_id) = %(count_1)s

Related

SQLAlchemy ORM NOT EXISTS: table alias scoping

I have three entities, A, B, C, where C links As to Bs (A-*C-B). I want to find those instances of A for which there is no instance of C that is not connected to a B.
I haven't been able to come up with a SQLAlchemy query that will do this for me and I'm beginning to think there's a problem with the compiler.
The following unit test illustrates this:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Column, Integer, create_engine, literal
_sql_engine = create_engine('sqlite:///:memory:')
session = sessionmaker(bind=_sql_engine)()
def test_model():
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
a = Column(Integer, primary_key=True)
b = Column(Integer, primary_key=True)
Base.metadata.create_all(_sql_engine)
a = [A(id=10), A(id=20)]
c = [C(a=10, b=11), C(a=20, b=21)]
b = [B(id=11)]
session.add_all(a + c + b)
session.commit()
q = session.query(A).filter(
A.id < literal(100),
~(
session.query(C)
.filter(
A.id == C.a,
~(
session.query(B)
.filter(
B.id == C.b,
).exists()
)
).exists()
)
)
print(q.statement)
print(len(q.all()))
assert len(q.all()) == 1
The test expects one result, but it gets zero. The SQL statement that is printed is
SELECT a.id
FROM a
WHERE a.id < :param_1 AND NOT (EXISTS (SELECT 1
FROM c
WHERE NOT (EXISTS (SELECT 1
FROM b, a
WHERE b.id = c.b AND a.id = c.a))))
Now, it looks to me like the problem is with the third FROM statement. b and a override the aliases above and disconnected from the previous constraints.
Is this correct? Is this how scoping in SQL works? If so, am I making a mistake with SQLAlchemy or is this a bug?
(The unit test uses SQLite, but the end result should run in PostgreSQL.)
Thanks to zzzeek on github, I found the answer: aliases are shadowed in subqueries after the second nesting. If we don't want this, we use correlated subqueries.
Fixed unit test:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, aliased
from sqlalchemy import Column, Integer, create_engine, literal
_sql_engine = create_engine('sqlite:///:memory:')
session = sessionmaker(bind=_sql_engine)()
def test_model():
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
a = Column(Integer, primary_key=True)
b = Column(Integer, primary_key=True)
Base.metadata.create_all(_sql_engine)
a = [A(id=10), A(id=20)]
c = [C(a=10, b=11), C(a=20, b=21)]
b = [B(id=11)]
session.add_all(a + c + b)
session.commit()
q = session.query(A).filter(
A.id < literal(100),
~(
session.query(C)
.filter(
A.id == C.a,
~(
session.query(B)
.filter(
B.id == C.b
).exists().correlate(A,C)
)
).exists()
)
)
print(q.statement)
print(len(q.all()))
assert len(q.all()) == 1

SQLAlchemy looses column label on chained union/except_

I have a somewhat complex query where I need to join subquery. That subquery contains except and union. In RAW sql it looks something like this
SELECT ... FROM table t
JOIN (SELECT id AS foo_id FROM foo WHERE select_me
EXCLUDE SELECT foo_id FROM bar WHERE add_or_remove = 'remove'
UNION SELECT foo_id FROM bar WHERE add_or_remove = 'add') subq
ON t.foo_id = subq.foo_id;
Where foo and bar tables are defined like this:
class Foo(Base):
__tablename__ = 'foo'
id = Column(Integer, primary_key=True, autoincrement=True)
select_me = Column(Boolean)
class Bar(Base):
__tablename__ = 'bar'
foo_id = Column(Integer, primary_key=True)
add_or_remove = Column(Enum('add', 'remove', name='add_or_remove'), primary_key=True)
When I'm trying to make this subquery in SQLAlchemy, it looses column label when I add second union/except_.
Here is what I'm talking about:
q = session.query(Foo.id.label('foo_id')).filter(Foo.select_me)
print(q.subquery().c)
Prints ['%(140275696626880 anon)s.foo_id'] still contains correct label
q = q.union(session.query(Bar.foo_id.label('foo_id')).filter(Bar.add_or_remove == 'add'))
print(q.subquery().c)
Prints ['%(140275696767384 anon)s.foo_id'] still contains correct label
q = q.except_(session.query(Bar.foo_id.label('foo_id')).filter(Bar.add_or_remove == 'remove'))
print(q.subquery().c)
Prints ['%(140275696769064 anon)s.%(140275696769008 anon)s_foo_id'] now column is labeled with autogenerated name and I cannot use it to specify condition in join.
For now I think I can just take first column and use it. But this is hacky solution, so I wonder if this is bug in SQLAlchemy or I'm doing something wrong.

Python/SQLAlchemy - Need to convert an inner join query with 3 tables into Python

I need some help converting this query into SQLAlchemy.
select field from table t1 join table t2 on t1.detail_id = t2.id join table t3 on t3.id = t2.rate_id where t2.name = 'fred' and t3.rate_type = 'Custom' and t3.description = 'Default';
I have been able to convert inner join queries with two tables, but need some help with this one.
Appreciate your help.TIA.
If simple SQL query is enough you can try:
session.execute("SELECT t1.field AS t1_field "
"FROM t1 JOIN t2 ON t1.detail_id = t2.id JOIN t3 ON t2.rate_id = t3.id "
"WHERE t2.name = :name AND t3.rate_type = :rate_type AND t3.description = :description",
{'name': 'fred', 'rate_type': 'Custom', 'description': 'Default'})
But if you want to use SQLAlchemy declarative base then the query would look like:
results = session.query(T1.field).join(T2, T1.detail_id == T2.id).join(T3, T2.rate_id == T3.id).\
filter(T2.name == 'fred').\
filter(T3.rate_type == 'custom').\
filter(T3.description == 'lorem ipsum').all()
For the following models:
from sqlalchemy import create_engine, Integer, ForeignKey, String, Column
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class T1(Base):
__tablename__ = 't1'
id = Column(Integer, primary_key=True)
field = Column(String)
detail_id = Column(Integer, ForeignKey("t1.id"))
class T2(Base):
__tablename__ = 't2'
id = Column(Integer, primary_key=True)
name = Column(String)
rate_id = Column(Integer, ForeignKey("t1.id"))
class T3(Base):
__tablename__ = 't3'
id = Column(Integer, primary_key=True)
rate_type = Column(String)
description = Column(String)
I hope it helps.
SQLAlchemy provide both ORM way and SQL way to operate database. You can use exactly the raw SQL language (or SQLAlachemy SQL Express) to query.
(1) RAW SQL QUERY,Sample code:
engine = create_engine(...)
q = 'SELECT foo FROM t_bar WHERE col_name=:v_parameters'
rs = engine.execute(sqlalchemy.text(q), v_parameters=your_actual_value)
Check execute and basic usage. Also take look at ResultProxy to understand how to operate on returned result.
(2) ORM. If you want to use ORM, firstly you have to define models and mapper class. Sample Code.
from sqlalchemy import Column, ForeignKey
from sqlalchemy.types import String, Integer
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Father(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# Relationship attributes
children = relationship('Sons',
passive_deletes=True,
back_populates='father')
class Son(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# foreign keys
p_id = Column(Integer, ForeignKey('Father.id',
ondelete='CASCADE',
onupdate='CASCADE'))
# Relationship attributes
parent = relationship('Father',
passive_deletes=True,
back_populates='sons')
Then you can do ORM query operations
session.query(Father).join(Father.sons).filter(Son.name=='Sam')
is equal to SQL query
SELECT father.id, father.name FROM father JOIN son ON father.id=son.p_id WHERE son.name='Sam'.
Please check ORM mapper and ORM Query for more information.
For you application. If you have well mapped all of your tables, then you can use ORM way. If you do not need ORM feature, you can just use RAW SQL query.
Thank.

Sqlalchemy exists with joined inheritance and Firebird

I tried to use sqlaclhemy joined table inheritance and had a strange occurrence.
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
...
class GoodsPlacement(Container, Loadable, Dumpable):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
...
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
content = relationship("GoodsPlacement",
primaryjoin="Departure.content_id==GoodsPlacement.id",
foreign_keys=[content_id],
lazy='joined',
backref="departures")
...
When I write query:
session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
it generates me something like this:
SELECT
objects.objid AS objects_objid,
goods_placements.objid AS goods_placements_objid,
objects.objname AS objects_objname
FROM objects
JOIN goods_placements ON objects.objid = goods_placements.objid
WHERE NOT (EXISTS (
SELECT 1
FROM (
SELECT
objects.objid AS objects_objid,
objects.objname AS objects_objname,
departures.id AS departures_id,
departures.content_id AS departures_content_id,
departures.status_id AS departures_status_id
FROM objects
JOIN departures ON objects.objid = departures.id)
AS anon_1, objects
WHERE anon_1.departures_content_id = objects.objid
AND anon_1.departures_status_id < :status_id_1)
)
And this doesn't work because objects in exist clause overrides outer objects.
As workaround I used exists from sqlexpression directly,
session.query(GoodsPlacement).filter(~exists([1],
and_("departures.status_id<2",
"departures.content_id=goods_placements.objid"),
from_obj="departures"))
but it strongly depends from column and table names.
How I can specify alias for object table in exists statement?
Debian wheezy, python-2.7.3rc2, sqlaclhemy 0.7.7-1
there's a bug involving the declarative system in how it sets up columns. The "objid" name you're giving the columns, distinct from the "id" attribute name, is the source of the issue here. The below test case approximates your above system and shows a workaround until the bug is fixed:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base= declarative_base()
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
class Container(CommonObject):
__tablename__ = 'containers'
id = Column("objid", Integer, ForeignKey("objects.objid"), primary_key=True)
class GoodsPlacement(Container):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
status_id = Column(Integer)
content = relationship("GoodsPlacement",
primaryjoin=lambda:Departure.__table__.c.content_id==GoodsPlacement.__table__.c.objid,
backref="departures"
)
session = Session()
print session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
output:
SELECT objects.objid AS objects_objid, containers.objid AS containers_objid, goods_placements.objid AS goods_placements_objid, objects.objname AS objects_objname
FROM objects JOIN containers ON objects.objid = containers.objid JOIN goods_placements ON containers.objid = goods_placements.objid
WHERE NOT (EXISTS (SELECT 1
FROM (SELECT objects.objid AS objects_objid, objects.objname AS objects_objname, departures.id AS departures_id, departures.content_id AS departures_content_id, departures.status_id AS departures_status_id
FROM objects JOIN departures ON objects.objid = departures.id) AS anon_1
WHERE anon_1.departures_content_id = goods_placements.objid AND anon_1.departures_status_id < :status_id_1))

Using PostgreSQL array to store many-to-many relationship

Suppose we have a PostgreSQL database with two tables A, B.
table A columns: id, name
table B columns: id, name, array_a
The column array_a in table B contains a variable length array of ids from table A. In SQLAlchemy we have two classes that model those tables, say class A and B.
The following works fine to get all the objects A that are referenced in an object B:
session.query(A).join(B, A.id == func.any(B.array_a)).filter(B.id == <id>).all()
How can we create a relationship in B referencing the objects A corresponding to the array? Tried column comparators using the func.any above but it complains that ANY(array_a) is not a column in the model. Specifying the primaryjoin conditions as above doesn't seem to cut it either.
This anti-pattern is called "Jaywalking"; and PostgreSQL's powerful type system makes it very tempting. you should be using another table:
CREATE TABLE table_a (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE table_b (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE a_b (
a_id INTEGER PRIMARY KEY REFERENCES table_a(id),
b_id INTEGER PRIMARY KEY REFERENCES table_b(id)
)
Which is mapped:
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
a_b_table = Table("a_b", Base.metadata,
Column("a_id", Integer, ForeignKey("table_a.id"), primary_key=True),
Column("b_id", Integer, ForeignKey("table_b.id"), primary_key=True))
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
a_set = relationship(A, secondary=a_b_table, backref="b_set")
example:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM a_b, table_b
WHERE table_a.id = a_b.a_id AND table_b.id = a_b.b_id AND table_b.name = :name_1)
If you are stuck with the ARRAY column, your best bet is to use an alternate selectable that "looks" like a proper association table.
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_b_selectable = select([func.unnest(B.array_a).label("a_id"),
B.id.label("b_id")]).alias()
A.b_set = relationship(B, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True,)
B.a_set = relationship(A, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True)
which gives you:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM (SELECT unnest(table_b.array_a) AS a_id, table_b.id AS b_id
FROM table_b) AS anon_1, table_b
WHERE table_a.id = anon_1.a_id AND anon_1.b_id = table_b.id AND table_b.name = :name_1)
And obviously, since there's no real table there, viewonly=True is neccesary and you can't get the nice, dynamic objecty goodness you would if you had avoided jaywalking.
Or else simply you can join explicitly like below:
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_ids= relationship('A',primaryjoin='A.id == any_(foreign(B.array_a))',uselist=True)

Categories

Resources