Add alias to SqlAlchemy join object - python

I am trying to select a column from a subquery (code below).
import sqlalchemy as sa
table1 = sa.schema.Table('t1',
db,
autoload=True,
autoload_with=db.engine)
si = Query().cte()
pids = sa.select([t1.c.pid], distinct=True).where(t1.c.date > date)
jt = pids.join(
si,
(si.c.pid == t1.c.pid) &
(si.c.startdate <= date) &
(si.c.enddate > date))
return sa.select([si.c.id]).select_from(jt)
However, I always get an error like:
sqlalchemy.exc.ProgrammingError: (psycopg2.ProgrammingError) subquery in FROM must have an alias
LINE 6: FROM (SELECT DISTINCT t1.pid AS pid
^
HINT: For example, FROM (SELECT ...) [AS] foo.
How should I add an alias to my subquery?

Related

Can I intersect two Queryset of same table but with different query?

minimum_likes_queryset = PostInLanguages.objects.annotate(likes=Count('like_model', distinct=True)).filter(likes__gte=minimum_likes)
recouched_posts_ids = PostInLanguages.objects.values('parent_post_language_id').annotate(recouch_count=Count('parent_post_language_id')).filter(recouch_count__gte=minimum_recouch, is_post_language=False).order_by().values_list('parent_post_language_id', flat=True)
recouched_post_queryset = PostInLanguages.objects.filter(id__in=recouched_posts_ids)
this is the query
SELECT "api_postinlanguages"."id", "api_postinlanguages"."post_in_language_uuid", "api_postinlanguages"."post_id", "api_postinlanguages"."language_id", "api_postinlanguages"."is_post_language", "api_postinlanguages"."parent_post_language_id", "api_postinlanguages"."description", "api_postinlanguages"."created_on", COUNT(DISTINCT "api_postlanguagelike"."id") AS "likes" FROM "api_postinlanguages" LEFT OUTER JOIN "api_postlanguagelike" ON ("api_postinlanguages"."id" = "api_postlanguagelike"."post_language_id") GROUP BY "api_postinlanguages"."id" HAVING COUNT(DISTINCT "api_postlanguagelike"."id") >= 1
SELECT "api_postinlanguages"."id", "api_postinlanguages"."post_in_language_uuid", "api_postinlanguages"."post_id", "api_postinlanguages"."language_id", "api_postinlanguages"."is_post_language", "api_postinlanguages"."parent_post_language_id", "api_postinlanguages"."description", "api_postinlanguages"."created_on" FROM "api_postinlanguages" WHERE "api_postinlanguages"."id" IN (SELECT U0."parent_post_language_id" FROM "api_postinlanguages" U0 WHERE NOT U0."is_post_language" GROUP BY U0."parent_post_language_id" HAVING COUNT(U0."parent_post_language_id") >= 1)
this is the exception
An exception occurred: column "api_postinlanguages.id" must appear in the GROUP BY clause or be used in an aggregate function
LINE 1: SELECT COUNT(*) FROM (SELECT "api_postinlanguages"."id" AS "...
^

SQLAlchemy execute query but does not update despite raw query working fine in a SQL IDE

I have a query that works fine in a SQL editor:
UPDATE users mu
JOIN (SELECT
min.user_id as user_id,
(max.total_followers - min.total_followers) as progression
FROM(select user_id, measurement_date, total_followers
from followers_totals ft
where measurement_date = (select max(measurement_date) from followers_totals as f where f.user_id = ft.user_id
and date_format(ft.measurement_date, '%%Y-%%m-%%d') >= date_format(date_sub(CURDATE(), interval 7 day), '%%Y-%%m-%%d'))) max
JOIN (select user_id, measurement_date, total_followers
from followers_totals ft
where measurement_date = (select min(measurement_date) from followers_totals as f where f.user_id = ft.user_id
and date_format(ft.measurement_date, '%%Y-%%m-%%d') >= date_format(date_sub(CURDATE(), interval 7 day), '%%Y-%%m-%%d'))) min
ON max.user_id = min.user_id
WHERE min.user_id = '123456' and max.user_id = '123456') progression
ON progression.user_id = mu.user_id
SET mu.followers_count_progress_7D = progression.progression
WHERE progression.user_id is not null;
I try to execute the same query from SQLAlchemy using the execute function:
import sqlalchemy
from sqlalchemy import create_engine, Table, MetaData, exc
eng = create_engine('mysql://xxxxxxxxxxxxxxxxxxxxxxxxxxxx')
con = eng.connect()
try:
query = """UPDATE users mu
JOIN (SELECT
min.user_id as user_id,
(max.total_followers - min.total_followers) as progression
FROM(select user_id, measurement_date, total_followers
from followers_totals ft
where measurement_date = (select max(measurement_date) from followers_totals as f where f.user_id = ft.user_id
and date_format(ft.measurement_date, '%%Y-%%m-%%d') >= date_format(date_sub(CURDATE(), interval 7 day), '%%Y-%%m-%%d'))) max
JOIN (select user_id, measurement_date, total_followers
from followers_totals ft
where measurement_date = (select min(measurement_date) from followers_totals as f where f.user_id = ft.user_id
and date_format(ft.measurement_date, '%%Y-%%m-%%d') >= date_format(date_sub(CURDATE(), interval 7 day), '%%Y-%%m-%%d'))) min
ON max.user_id = min.user_id
WHERE min.user_id = '123456' and max.user_id = '123456') progression
ON progression.user_id = mu.user_id
SET mu.followers_count_progress_7D = progression.progression
WHERE progression.user_id is not null;"""
rs = con.execute(query)
print(rs)
except exc.SQLAlchemyError as e:
print (e)
No exception is returned and print(rs) result in a return proxy as expected.
However the db does not get updated with SQLAlchemy while it is updated with a SQL editor.
Is there some part of my query that is not supported by SQL Alchemy?
I initially thought it would be the escape of the % in the date format, but different tests show that simpler queries run as expected using this escape writing.
EDIT: after using echo=True in the engine creation as suggested above I can see that the query formatting is preserve, the commit is done. I copied pasted the ouput of the echo to a sql editor and the query works well, but with sqlalchemy it does not update at all.
EDIT2: tried adding autocommit=True with same result....
Logs are showing:
2021-02-14 11:21:21,387 INFO sqlalchemy.engine.base.Engine ()
2021-02-14 11:21:21,389 INFO sqlalchemy.engine.base.Engine UPDATE users mu
JOIN (
SELECT min.user_id as user_id,
(max.total_followers - min.total_followers) as progression
FROM(
select user_id, measurement_date, total_followers
....
ON progression.user_id = mu.user_id
SET mu.followers_count_progress_7D = progression.progression
WHERE progression.user_id is not null;
2021-02-14 11:21:21,389 INFO sqlalchemy.engine.base.Engine ()
2021-02-14 11:21:21,393 INFO sqlalchemy.engine.base.Engine COMMIT
0
The user used to connect has all permissions:
GRANT ALL ON *.* TO 'user1'#'%';
Simpler update queries run on SQLAlchemy are actually working.
EDIT 3:
Interestingly it seems that this only happens for certain ids, but not all. How can something ID dependant work remotely but not locally...?
Since the debug printing didn't seem to give enough info to solve the issue, I'm going to assume that it is indeed an issue with actually committing changes to the DB, so, like other people have mentioned in various other questions (such as: setting autocommit to 1 in mysql), you should try explicitly using autocommit=True.
You can test this either with a with statement such as:
with engine.connect().execution_options(autocommit=True) as conn:
conn.execute(query)
or just appending .execution_options(autocommit=True) to your existing code:
conn.execute(query).execution_options(autocommit=True)
Note though that execution_option's autocommit parameter will be deprecated with SQLAlchemy 1.4 and that the replacement will be to set transaction isolation levels as shown here.
Just to reiterate, it seems like MySQL sets the autocommit value to 0 internally, meaning that it uses a transaction which needs to be .commit()ed to propagate it to the DB. Hope that actually solves the issue as I'm not set up to test this on my machine at the moment.

Select the count and value of a SQLAlchemy column using HAVING

I want to select the count of all contacts with the same email address that have more than one duplicate. I can't get this query working in SQLAlchey with PostgreSQL.
SELECT count(*), email FROM contact group by email having count(*) > 1
I tried this:
all_records = db.session.query(Contact).options(
load_only('email')).group_by(Contact.email).having(
func.count('*') > 1).all()
sqlalchemy.exc.ProgrammingError: (psycopg2.ProgrammingError) column "contact.id" must appear in the GROUP BY clause or be used in an aggregate function
LINE 1: SELECT contact.id AS contact_id, contact.email AS contact_em...
^
[SQL: 'SELECT contact.id AS contact_id, contact.email AS contact_email \nFROM contact GROUP BY contact.email \nHAVING count(%(count_1)s) > %(count_2)s'] [parameters: {'count_1': '*', 'count_2': 1}]
And I tried this:
all_records = db.session.query(func.count(Contact.id)).options(
load_only('email')).group_by(Contact.email).having(
func.count('*') > 1).all()
sqlalchemy.exc.ArgumentError
sqlalchemy.exc.ArgumentError: Wildcard loader can only be used with exactly one entity. Use Load(ent) to specify specific entities.
It works correctly if I execute raw SQL:
all_records = db.session.execute(
"SELECT count(*), email FROM contact group by email"
" having count(*) > 1").fetchall()
I'm using Flask-SQLAlchemy, but here's a minimal SQLAlchemy setup to demonstrate the issue:
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Contact(Base):
__tablename__ = 'contact'
id = sa.Column(sa.Integer, primary_key=True)
email = sa.Column(sa.String)
engine = sa.create_engine('postgresql:///example', echo=True)
Base.metadata.create_all(engine)
session = orm.Session(engine)
session.add_all((
Contact(email='a#example.com'),
Contact(email='b#example.com'),
Contact(email='a#example.com'),
Contact(email='c#example.com'),
Contact(email='a#example.com'),
))
session.commit()
# first failed query
all_records = session.query(Contact).options(
orm.load_only('email')).group_by(Contact.email).having(
sa.func.count('*') > 1).all()
# second failed query
all_records = db.session.query(sa.func.count(Contact.id)).options(
orm.load_only('email')).group_by(Contact.email).having(
sa.func.count('*') > 1).all()
With the sample data, I expect to get one result row, 3, a#example.com.
You're not building the same query in SQLAlchemy that you're writing manually.
You want to select the count of each email that has more than one occurrence.
q = session.query(
db.func.count(Contact.email),
Contact.email
).group_by(
Contact.email
).having(
db.func.count(Contact.email) > 1
)
print(q)
SELECT count(contact.email) AS count_1, contact.email AS contact_email
FROM contact GROUP BY contact.email
HAVING count(contact.email) > %(count_2)s
The first query fails because you query the entire model, so SQLAlchemy selects all columns. You can only select grouped columns when using group_by. SQLAlchemy must always select the primary key when querying the entire model, load_only doesn't affect that.
The second query fails because load_only only works when selecting an entire model, but you're selecting an aggregate and a column.
Just select what you would in a text query:
db.session.query(func.count('*'), Contact.email).\
group_by(Contact.email).\
having(func.count('*') > 1).\
all()

Sqlalchemy: subquery in FROM must have an alias

How can I structure this sqlalchemy query so that it does the right thing?
I've given everything I can think of an alias, but I'm still getting:
ProgrammingError: (psycopg2.ProgrammingError) subquery in FROM must have an alias
LINE 4: FROM (SELECT foo.id AS foo_id, foo.version AS ...
Also, as IMSoP pointed out, it seems to be trying to turn it into a cross join, but I just want it to join a table with a group by subquery on that same table.
Here is the sqlalchemy:
(Note: I've rewritten it to be a standalone file that is as complete as possible and can be run from a python shell)
from sqlalchemy import create_engine, func, select
from sqlalchemy import Column, BigInteger, DateTime, Integer, String, SmallInteger
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine('postgresql://postgres:########localhost:5435/foo1234')
session = sessionmaker()
session.configure(bind=engine)
session = session()
Base = declarative_base()
class Foo(Base):
__tablename__ = 'foo'
__table_args__ = {'schema': 'public'}
id = Column('id', BigInteger, primary_key=True)
time = Column('time', DateTime(timezone=True))
version = Column('version', String)
revision = Column('revision', SmallInteger)
foo_max_time_q = select([
func.max(Foo.time).label('foo_max_time'),
Foo.id.label('foo_id')
]).group_by(Foo.id
).alias('foo_max_time_q')
foo_q = select([
Foo.id.label('foo_id'),
Foo.version.label('foo_version'),
Foo.revision.label('foo_revision'),
foo_max_time_q.c.foo_max_time.label('foo_max_time')
]).join(foo_max_time_q, foo_max_time_q.c.foo_id == Foo.id
).alias('foo_q')
thing = session.query(foo_q).all()
print thing
generated sql:
SELECT foo_id AS foo_id,
foo_version AS foo_version,
foo_revision AS foo_revision,
foo_max_time AS foo_max_time,
foo_max_time_q.foo_max_time AS foo_max_time_q_foo_max_time,
foo_max_time_q.foo_id AS foo_max_time_q_foo_id
FROM (SELECT id AS foo_id,
version AS foo_version,
revision AS foo_revision,
foo_max_time_q.foo_max_time AS foo_max_time
FROM (SELECT max(time) AS foo_max_time,
id AS foo_id GROUP BY id
) AS foo_max_time_q)
JOIN (SELECT max(time) AS foo_max_time,
id AS foo_id GROUP BY id
) AS foo_max_time_q
ON foo_max_time_q.foo_id = id
and here is the toy table:
CREATE TABLE foo (
id bigint ,
time timestamp with time zone,
version character varying(32),
revision smallint
);
The SQL was I expecting to get (desired SQL) would be something like this:
SELECT foo.id AS foo_id,
foo.version AS foo_version,
foo.revision AS foo_revision,
foo_max_time_q.foo_max_time AS foo_max_time
FROM foo
JOIN (SELECT max(time) AS foo_max_time,
id AS foo_id GROUP BY id
) AS foo_max_time_q
ON foo_max_time_q.foo_id = foo.id
Final note:
I'm hoping to get an answer using select() instead of session.query() if possible. Thank you
You are almost there. Make a "selectable" subquery and join it with the main query via join():
foo_max_time_q = select([func.max(Foo.time).label('foo_max_time'),
Foo.id.label('foo_id')
]).group_by(Foo.id
).alias("foo_max_time_q")
foo_q = session.query(
Foo.id.label('foo_id'),
Foo.version.label('foo_version'),
Foo.revision.label('foo_revision'),
foo_max_time_q.c.foo_max_time.label('foo_max_time')
).join(foo_max_time_q,
foo_max_time_q.c.foo_id == Foo.id)
print(foo_q.__str__())
Prints (prettified manually):
SELECT
foo.id AS foo_id,
foo.version AS foo_version,
foo.revision AS foo_revision,
foo_max_time_q.foo_max_time AS foo_max_time
FROM
foo
JOIN
(SELECT
max(foo.time) AS foo_max_time,
foo.id AS foo_id
FROM
foo
GROUP BY foo.id) AS foo_max_time_q
ON
foo_max_time_q.foo_id = foo.id
The complete working code is available in this gist.
Cause
subquery in FROM must have an alias
This error means the subquery (on which we're trying to perform a join) has no alias.
Even if we .alias('t') it just to satisfy this requirement, we will then get the next error:
missing FROM-clause entry for table "foo"
That's because the join on clause (... == Foo.id) is not familiar with Foo.
It only knows the "left" and "right" tables: t (the subquery) and foo_max_time_q.
Solution
Instead, select_from a join of Foo and foo_max_time_q.
Method 1
Replace .join(B, on_clause) with .select_from(B.join(A, on_clause):
]).join(foo_max_time_q, foo_max_time_q.c.foo_id == Foo.id
]).select_from(foo_max_time_q.join(Foo, foo_max_time_q.c.foo_id == Foo.id)
This works here because A INNER JOIN B is equivalent to B INNER JOIN A.
Method 2
To preserve the order of joined tables:
from sqlalchemy import join
and replace .join(B, on_clause) with .select_from(join(A, B, on_clause)):
]).join(foo_max_time_q, foo_max_time_q.c.foo_id == Foo.id
]).select_from(join(Foo, foo_max_time_q, foo_max_time_q.c.foo_id == Foo.id)
Alternatives to session.query() can be found here.

sqlalchemy exists() - how to avoid extra From

exists() containing another exists() results in extra From clause.
model.session.query(Table1.id).\
filter(~ exists().\
where(Table2.table1_id==Table1.id).\
where(~ exists().\
where(Table3.contract_id==Table2.contract_id).\
where(Table3.session_id==Table1.session_id))
)
this is generating:
SELECT table1.id AS table1_id FROM table1
WHERE NOT (EXISTS (SELECT * FROM table2
WHERE table2.table1_id = table1.id
AND NOT (EXISTS (SELECT * FROM table3, table1
WHERE table3.contract_id = table2.contract_id
AND table3.session_id = table1.session_id))))
Here, "FROM table1" in the last "exists" is not required because table1 is already in the topmost query. How can I force sqlalchemy not to add this extra "FROM table1"?
What I really want is:
SELECT table1.id AS table1_id FROM table1
WHERE NOT (EXISTS (SELECT * FROM table2
WHERE table2.table1_id = table1.id
AND NOT (EXISTS (SELECT * FROM table3
WHERE table3.contract_id = table2.contract_id
AND table3.session_id = table1.session_id))))
I wonder how to achieve that.
Can somebody help me please?
Using SQLAlchemy 0.7.9.
q = (session.query(Table1.id)
.filter(~exists(
select([Table2.id])
.where(Table2.table1_id == Table1.id)
.where(~exists(
# changing exists to be implicit enables the 'important' below
select([Table3.id])
.where(Table3.contract_id == Table2.contract_id)
.where(Table3.session_id == Table1.session_id)
# this is important
.correlate(Table1)
.correlate(Table2)
))
)))

Categories

Resources