SQLAlchemy subquery comparison

SQLAlchemy subquery comparison - python

I have the following set of tables:
class Job(db.Model):
__tablename__ = 'jobs'
id = db.Column(db.Integer, primary_key=True)
class Informant(db.Model):
__tablename__ = 'informants'
id = db.Column(db.Integer, primary_key=True)
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
max_students = db.Column(db.Integer)
class Student(db.Model):
__tablename__ = 'students'
id = db.Column(db.Integer, primary_key=True)
queues = db.Table('queues',
db.Column('student_id', db.Integer, db.ForeignKey('students.id')),
db.Column('job_id', db.Integer, db.ForeignKey('jobs.id')),
PrimaryKeyConstraint('student_id', 'job_id'))
Now I need to obtain something like:
SELECT jobs.id
FROM jobs
WHERE (
SELECT SUM(informants.max_students)
FROM informants
WHERE informants.job_id = jobs.id
) <= (
SELECT COUNT(1)
FROM queues
WHERE queues.job_id = jobs.id
)
So basically I search the jobs with an amount of students that exceed the maximal capacity, the sum of the related informants' capacities. Is there a clean way to do this in SQLAlchemy? I tried the following:
db.session.query(Job.id).filter( \
db.session.query(db.func.sum(Informant.max_students)). \
filter(Informant.job_id == Job.id) <= \
db.session.query(db.func.count(1)).select_from(queues). \
filter(queues.c.job_id == Job.id))
This yields something like SELECT jobs.id FROM jobs WHERE 0 = 1. Is there something I'm missing, because I have successfully used similar queries before. Or am I better of using db.engine.execute to execute the raw SQL?

I was close to having the right answer. The piece that was missing is an as_scalar method on both subqueries. So the final query is:
db.session.query(Job.id).filter( \
db.session.query(db.func.sum(Informant.max_students)). \
filter(Informant.job_id == Job.id).as_scalar() <= \
db.session.query(db.func.count(1)).select_from(queues). \
filter(queues.c.job_id == Job.id).as_scalar())

Related

How to group by func.week, year etc in Association proxy

I have two databases:
class AcUsers(db.Model):
__tablename__ = "ac_users"
id = Column(Integer, primary_key=True)
b2c_customer_id = Column(Integer, ForeignKey("b2c_customer.id",
ondelete="CASCADE"))
ac_tests = db.relationship(
"AcTests",
back_populates="ac_user",
uselist=False,
lazy="joined",
)
last_test_submit_time = association_proxy("ac_tests",
"last_test_submit_time")
class AcTests(db.Model):
__tablename__ = "ac_tests"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("ac_users.id"))
ac_user = db.relationship(
"AcUsers",
back_populates="ac_tests",
uselist=False,
lazy="joined",
)
first_test_started_time = Column(DateTime)
last_test_submit_time = Column(DateTime)
I have tried to query the database like this:
ac = (
db.session.query(
func.count(AcUsers.id),
func.week(AcUsers.last_test_submit_time),
func.year(AcUsers.last_test_submit_time),
)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
I got this error:
NotImplementedError: The association proxy can't be used as a plain column expression; it only works inside of a comparison expression
How the I get past this error in order to be able to use the group by week and year in order to know exactly the user for the present dispensation.
I have tried to get the daily, monthly, and yearly data by getting the data in the AcUsers and looping through it:
ac_users = (
db.session.query(AcUsers)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
daily:
for user in ac_users:
actual_date = user.last_test_submit_time.date()
if actual_date not in daily_data:
daily_data[actual_date] = 1
else:
daily_data[actual_date] += 1
results_sent_daily = []
for x, y in daily_data.items():
results_sent_daily.append({"x": x, "y": y})
I did the same for the monthly and yearly.
How do i make the query use the grouping func into func.year, func.week, func.date etc.
Thank you in advance

As stated in the association proxy documentation, the proxies are not meant for ORM querying aside from filtering.
The most simple thing to do is to craft a query using both models, specifying the join between them and the grouping fields.
ac = (
s.dbs.query(
func.count(AcUsers.id).label("count"),
func.week(AcTests.last_test_submit_time).label("week"),
func.year(AcTests.last_test_submit_time).label("year"),
)
.join(AcUsers.ac_tests)
.group_by(
func.week(AcTests.last_test_submit_time),
func.year(AcTests.last_test_submit_time)
)
.filter(AcTests.last_test_submit_time != None)
.all()
)

SQLAlchemy many-to-many association querying specific child

In the case of many-to-many relationships, an association table can be used in the form of Association Object pattern.
I have the following setup of two classes having a M2M relationship through UserCouncil association table.
class Users(Base):
name = Column(String, nullable=False)
email = Column(String, nullable=False, unique=True)
created_at = Column(DateTime, default=datetime.utcnow)
password = Column(String, nullable=False)
salt = Column(String, nullable=False)
councils = relationship('UserCouncil', back_populates='user')
class Councils(Base):
name = Column(String, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
users = relationship('UserCouncil', back_populates='council')
class UserCouncil(Base):
user_id = Column(UUIDType, ForeignKey(Users.id, ondelete='CASCADE'), primary_key=True)
council_id = Column(UUIDType, ForeignKey(Councils.id, ondelete='CASCADE'), primary_key=True)
role = Column(Integer, nullable=False)
user = relationship('Users', back_populates='councils')
council = relationship('Councils', back_populates='users')
However, in this situation, suppose I want to search for a council with a specific name cname for a given user user1. I can do the following:
for council in user1.councils:
if council.name == cname:
dosomething(council)
Or, alternatively, this:
session.query(UserCouncil) \
.join(Councils) \
.filter((UserCouncil.user_id == user1.id) & (Councils.name == cname)) \
.first() \
.council
While the second one is more similar to raw SQL queries and performs better, the first one is simpler. Is there any other, more idiomatic way of expressing this query which is better performing while also utilizing the relationship linkages instead of explicitly writing traditional joins?

First, I think even the SQL query you bring as an example might need to go to fetch the UserCouncil.council relationship again to the DB if it is not loaded in the memory already.
I think that given you want to search directly for the Council instance given its .name and the User instance, this is exactly what you should ask for. Below is the query for that with 2 options on how to filter on user_id (you might be more familiar with the second option, so please use it):
q = (
select(Councils)
.filter(Councils.name == councils_name)
.filter(Councils.users.any(UserCouncil.user_id == user_id)) # v1: this does not require JOIN, but produces the same result as below
# .join(UserCouncil).filter(UserCouncil.user_id == user_id) # v2: join, very similar to original SQL
)
council = session.execute(q).scalars().first()
As to making it more simple and idiomatic, I can only suggest to wrap it in a method or property on the User instance:
class Users(...):
...
def get_council_by_name(self, councils_name):
q = (
select(Councils)
.filter(Councils.name == councils_name)
.join(UserCouncil).filter(with_parent(self, Users.councils))
)
return object_session(self).execute(q).scalars().first()
so that you can later call it user.get_council_by_name('xxx')
Edit-1: added SQL queries
v1 of the first q query above will generate following SQL:
SELECT councils.id,
councils.name
FROM councils
WHERE councils.name = :name_1
AND (EXISTS
(SELECT 1
FROM user_councils
WHERE councils.id = user_councils.council_id
AND user_councils.user_id = :user_id_1
)
)
while v2 option will generate:
SELECT councils.id,
councils.name
FROM councils
JOIN user_councils ON councils.id = user_councils.council_id
WHERE councils.name = :name_1
AND user_councils.user_id = :user_id_1

Exclude something from where clause by using SQLAlchemy core

I have the following model:
class Vote(BaseModel):
__tablename__ 'vote'
id = sa.Column(sa.Integer, autoincrement=True, index=True, primary_key=True)
value = sa.Column(sa.Integer, nullable=False)
rated_user_id = sa.Column(
sa.Integer, sa.ForeignKey('user.id', ondelete='cascade'))
rating_user_id = sa.Column(
sa.Integer, sa.ForeignKey('user.id', ondelete='cascade'))
And I just want to make a query with gives me joined data., nevertheless I don't know how to make this query. This is my approach:
query = sa.select(
[votes, users.alias('u1'), users.alias('u2')],
use_labels=True
).select_from(votes.join(users.alias('u1'),votes.c.rated_user_id == users.alias('u1').c.id).join(users.alias('u2'), votes.c.rating_user_id == users.alias('u2').c.id))
Buy it doesn't work because it includes "user" as "u1" in FROM clause.
Thanks!

Each invocation of alias() produces a unique alias object, even if you give them the same label. Instead give the aliases a name and use the same object in every part of your query:
u1 = users.alias('u1')
u2 = users.alias('u2')
query = sa.select([votes, u1, u2], use_labels=True).\
select_from(votes.
join(u1, votes.c.rated_user_id == u1.c.id).
join(u2, votes.c.rating_user_id == u2.c.id))

Strange filter behavior in flask-sqlalchemy

I have a query in flask-sqlalchemy and filter is behaving strange:
q.filter(Transaction.transaction_id == ReconciledTransaction.safe_withdraw_id).all()
It works fine, but:
q.filter(Transaction.transaction_id != ReconciledTransaction.safe_withdraw_id).all()
Doesn't work correctly! What seems to be the problem?
UPD
My models:
Reconciled transaction model:
class ReconciledTransactionModel(db.Model):
"""Reconciled Transaction model"""
__tablename__ = 'ReconciledTransaction'
id = db.Column('id', db.Integer, primary_key=True, nullable=False)
balance_entry_id = db.Column('BalanceEntry_id', db.Integer, db.ForeignKey("BalanceEntry.id"), nullable=False)
safe_withdraw_id = db.Column('Transaction_id', db.String, nullable=False)
datetime = db.Column('datetime', db.Date(), nullable=False)
balance_entry_amount = db.Column('BalanceEntry_amount', db.Float)
reconciled_amount = db.Column('ReconciledAmount', db.Float)
currency = db.Column('currency', db.String)
reconciliation_status = db.Column('reconciliation_status', db.String, nullable=False)
status_code = db.Column('status_code', db.Integer, nullable=False)
Transaction Model:
class TransactionModel(db.Model):
"""Transaction SA model."""
__tablename__ = 'Transaction'
id = db.Column('id', db.Integer, primary_key=True)
till_id = db.Column('Till_id', db.Integer, db.ForeignKey("Till.id"),
nullable=False)
till = relationship("Till", foreign_keys=[till_id], backref="transactions", enable_typechecks=False)
establishment_id = db.Column('Establishment_id', db.Integer,
db.ForeignKey("Establishment.id"),
nullable=False)
establishment = relationship("Establishment",
foreign_keys=[establishment_id],
backref="transactions",
enable_typechecks=False)
employee_id = db.Column('Employee_id', db.Integer,
db.ForeignKey("Employee.id"),
nullable=False)
employee = relationship("Employee",
foreign_keys=[employee_id],
backref="transactions",
enable_typechecks=False)
local_time = db.Column('local_time', db.DateTime, nullable=False)
create_time = db.Column('create_time', db.TIMESTAMP(timezone=True),
nullable=False)
send_time = db.Column('send_time', db.TIMESTAMP(timezone=True),
nullable=False)
receive_time = db.Column('receive_time', db.TIMESTAMP(timezone=True),
nullable=False)
total_value = db.Column('total_value', db.Integer, nullable=False)
amount = db.Column('amount', db.Float, nullable=False)
discrepancy = db.Column('discrepancy', db.Float, nullable=False)
type = db.Column('type', db.Enum('shift',
'payment',
'skimming',
'withdraw',
'refund',
'till',
'till_deposit',
'safe_deposit',
'safe_withdraw',
'till_reset',
name='transaction_type'),
nullable=False)
status = db.Column('status',
db.Enum('start', 'end', name='transaction_status'),
nullable=False)
receipt_id = db.Column('receipt_id', db.String(32), server_default=None)
transaction_id = db.Column('transaction_id', db.String(32),
server_default=None)
parent_transaction = db.Column('parent_transaction', db.String(32),
server_default=None)
discrepancy_reason = db.Column('discrepancy_reason', db.String(1024))
resolve_discrepancy_reason = db.Column('resolve_discrepancy_reason',
db.String(1024))
accounted = db.Column('accounted', db.Boolean, default=False)
And here is my query:
_transactions = db.session.query(Transaction,
status_sq.c.count,
end_transaction_sq.c.discrepancy,
end_transaction_sq.c.discrepancy_reason,
end_transaction_sq.c.resolve_discrepancy_reason,
end_transaction_sq.c.amount,
). \
filter(Transaction.establishment_id.in_(store_ids)). \
filter(Transaction.amount != 0). \
filter_by(status='start')
transactions = _transactions. \
filter(Transaction.type.in_(transaction_types)). \
outerjoin(status_sq,
Transaction.transaction_id == status_sq.c.transaction_id). \
outerjoin(end_transaction_sq,
Transaction.transaction_id == end_transaction_sq.c.transaction_id)
# check possible values for sorting and pages
if sort_field not in allowed_sort_fields:
sort_field = Transaction.default_sort_field
if sort_dir not in (ASCENDING, DESCENDING):
sort_dir = Transaction.default_sort_dir
if per_page > 100: # hard limit
per_page = Transaction.default_per_page
if sort_dir == ASCENDING:
order = allowed_sort_fields[sort_field].desc()
else:
order = allowed_sort_fields[sort_field].desc()
q = transactions.\
join(Establishment).\
join(Employee, Transaction.employee_id == Employee.id). \
outerjoin(Currency). \
group_by(Transaction,
status_sq.c.count,
end_transaction_sq.c.discrepancy,
end_transaction_sq.c.discrepancy_reason,
end_transaction_sq.c.resolve_discrepancy_reason,
end_transaction_sq.c.amount,
allowed_sort_fields[sort_field]).\
order_by(order)
items = q.filter(Transaction.transaction_id == ReconciledTransaction.safe_withdraw_id).limit(per_page).offset((page - 1) * per_page).all()
'Doesn't work correctly' means that in second case(when I place !=, and wanna take transactions only, which are not in ReconciledTransaction table) filter gets ignored, but when filter contains ==, all works correctly(I have only matched transactions).

When you use query like this:
q = db.session.query(Transaction). \
filter(Transaction.transaction_id != ReconciledTransaction.safe_withdraw_id)
it transforms into SQL query:
SELECT Transaction.* FROM Transaction, ReconciledTransaction
WHERE Transaction.transaction_id != ReconciledTransaction.safe_withdraw_id
which means you will get all Transaction rows with all ReconciledTransaction rows except those with matching ids.
If you need to get all Transaction objects which are not in ReconciledTransaction table you can first get all ReconciledTransaction ids:
r_query = db.session.query(ReconciledTransaction.safe_withdraw_id). \
group_by(ReconciledTransaction.safe_withdraw_id)
r_ids = [x[0] for x in r_query]
and then use NOT IN filter in your Transaction query:
q = q.filter(Transaction.transaction_id.notin_(r_ids))
Or your can use subquery:
q = q.filter(Transaction.transaction_id.notin_(
db.session.query(ReconciledTransaction.safe_withdraw_id)
))
Edit: as Ilja Everilä stated NOT EXISTS operator performance might be better than NOT IN. SQLAlchemy query will look like this:
q = q.filter(~session.query(ReconciledTransaction). \
filter(ReconciledTransaction.safe_withdraw_id == Transaction.id).exists())

SQLalchemy top 3 results from each category

I have a models:
class Contract(ContractJsonSerializer, db.Model):
__tablename__ = 'contracts'
id = db.Column(db.Unicode(32), primary_key=True)
device_name = db.Column(db.Unicode(256), nullable=False)
monthly_price = db.Column(db.Numeric(precision=6, scale=2))
network_id = db.Column(db.Integer(), db.ForeignKey('networks.id'))
class Network(NetworkJsonSerializer, db.Model):
__tablename__ = 'networks'
id = db.Column(db.Integer(), primary_key=True)
name = db.Column(db.Unicode(20), nullable=False)
contracts = db.relationship('Contract', backref='network')
How can I get 3 cheapest contracts from each network?
I got following SQL:
set #num := 0, #network := '';
select *
from
(
select *,
#num := if(#network = network_id, #num + 1, 1) as row_number,
#network := network_id as dummy
from contracts
order by network_id, monthly_price
) as x
where x.row_number <= 3;
But when I'm trying to execute it I'm getting:
contracts = Contract.query.from_statement(sql).all()
"ResourceClosedError: This result object does not return rows. It has been closed automatically."
Can I do it in declarative way? If not what is the best way of approaching this problem?

After few hours of google & sqlalchemy docs I've got this:
# create query order by monthly price
base_query = Contract.query.order_by(Contract.monthly_price)
# build subqueries for each network
queries = []
for n in networks.all():
queries.append( base_query.filter(Contract.network_id==n.id).\
limit(3).subquery().select() )
# get contracts using union_all
contracts = Contract.query.select_entity_from(union_all( *queries )).all()
It seems to work correctly - returning 3 cheapest contracts for each network in one query.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

SQLAlchemy subquery comparison - python

Related

How to group by func.week, year etc in Association proxy

SQLAlchemy many-to-many association querying specific child

Exclude something from where clause by using SQLAlchemy core

Strange filter behavior in flask-sqlalchemy

SQLalchemy top 3 results from each category

Categories

Resources