SQLAlchemy: exclude rows taken from a subquery on a query - python

Abstraction of my problem, I have 2 tables. A User table, and a Friendship table.
I'm trying to make a query to list all the users available to be added as friend to User 1, Alice, and also excluding herself, using SQLAlchemy.
Considering there could be a lot of friendships, to find Alice's friends:
friend_subquery = db.session.query(Friendship).filter_by(User_id=1).subquery()
Now I want all the users listed, except Alice, and her friends, Bob and Jack.
friends = (db.session.query(User).
filter(User.ID != 1).
outerjoin(friend_subquery,
User.ID != friend_subquery.c.Friend_id))
My expected result would have been to get User 4 and 5, but this query
returns all except Alice herself. The condition of
User.ID != friend_subquery.c.Friend_id
seem NOT to be working as expected.
P.S. I've done my homework of searching, reading docs, but couldn't figure it out. Thanks for your time.

I assumed that your models are defined as below:
class User(db.Model):
__tablename__ = 'User'
ID = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100))
friendships = db.relationship(
'Friendship',
foreign_keys='Friendship.User_id',
backref='friender',
)
friendships_of = db.relationship(
'Friendship',
foreign_keys='Friendship.Friend_id',
backref='friendee',
)
class Friendship(db.Model):
__tablename__ = 'Friendship'
ID = db.Column(db.Integer, primary_key=True)
User_id = db.Column(db.Integer, db.ForeignKey('User.ID'))
Friend_id = db.Column(db.Integer, db.ForeignKey('User.ID'))
In which case two ways to perform this query is shown in the code below. The first query relies on the relationship User.friendships_of, while the second works with explicit joins:
# Add users
u1, u2, u3, u4, u5 = users = [
User(name="Alice"),
User(name="Bob"),
User(name="Jack"),
User(name="Pluto"),
User(name="Mike"),
]
db.session.add_all(users)
# Add friendhips
u1.friendships.append(Friendship(friendee=u2))
u1.friendships.append(Friendship(friendee=u3))
db.session.commit()
# Find Alice
u_alice = db.session.query(User).filter(User.name == 'Alice').one()
# Query (version 1)
q = (
db.session.query(User)
.filter(~User.friendships_of.any(Friendship.User_id == u_alice.ID))
.filter(User.ID != u_alice.ID)
.all()
)
for x in q:
print(x)
# Query (version 2)
q = (
db.session.query(User)
.outerjoin(
Friendship,
db.and_(
u_alice.ID == Friendship.User_id,
User.ID == Friendship.Friend_id,
)
)
.filter(Friendship.ID == None)
.filter(User.ID != u_alice.ID)
.all()
)
for x in q:
print(x)

Related

How to group by func.week, year etc in Association proxy

I have two databases:
class AcUsers(db.Model):
__tablename__ = "ac_users"
id = Column(Integer, primary_key=True)
b2c_customer_id = Column(Integer, ForeignKey("b2c_customer.id",
ondelete="CASCADE"))
ac_tests = db.relationship(
"AcTests",
back_populates="ac_user",
uselist=False,
lazy="joined",
)
last_test_submit_time = association_proxy("ac_tests",
"last_test_submit_time")
class AcTests(db.Model):
__tablename__ = "ac_tests"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("ac_users.id"))
ac_user = db.relationship(
"AcUsers",
back_populates="ac_tests",
uselist=False,
lazy="joined",
)
first_test_started_time = Column(DateTime)
last_test_submit_time = Column(DateTime)
I have tried to query the database like this:
ac = (
db.session.query(
func.count(AcUsers.id),
func.week(AcUsers.last_test_submit_time),
func.year(AcUsers.last_test_submit_time),
)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
I got this error:
NotImplementedError: The association proxy can't be used as a plain column expression; it only works inside of a comparison expression
How the I get past this error in order to be able to use the group by week and year in order to know exactly the user for the present dispensation.
I have tried to get the daily, monthly, and yearly data by getting the data in the AcUsers and looping through it:
ac_users = (
db.session.query(AcUsers)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
daily:
for user in ac_users:
actual_date = user.last_test_submit_time.date()
if actual_date not in daily_data:
daily_data[actual_date] = 1
else:
daily_data[actual_date] += 1
results_sent_daily = []
for x, y in daily_data.items():
results_sent_daily.append({"x": x, "y": y})
I did the same for the monthly and yearly.
How do i make the query use the grouping func into func.year, func.week, func.date etc.
Thank you in advance
As stated in the association proxy documentation, the proxies are not meant for ORM querying aside from filtering.
The most simple thing to do is to craft a query using both models, specifying the join between them and the grouping fields.
ac = (
s.dbs.query(
func.count(AcUsers.id).label("count"),
func.week(AcTests.last_test_submit_time).label("week"),
func.year(AcTests.last_test_submit_time).label("year"),
)
.join(AcUsers.ac_tests)
.group_by(
func.week(AcTests.last_test_submit_time),
func.year(AcTests.last_test_submit_time)
)
.filter(AcTests.last_test_submit_time != None)
.all()
)

SqlAlchemy: Sub-Select with a Dual EXISTS (OR) and an additional boolean check

In SqlAlchemy I need to implement the following subquery, which runs fine in PostgreSQL. It is an OR condition consisting of 2 EXISTS, plus an additional AND block. That whole column results in a True/False boolean value.
SELECT
...
...
,
(SELECT
(
EXISTS (SELECT id from participating_ic_t pi1 where pi1.id = agreement_t_1.participating_ic_id
AND pi1.ic_nihsac = substr(ned_person_t_2.nihsac, 1, 3))
OR EXISTS (SELECT id from external_people_t ep1 where ep1.participating_ic_id = agreement_t_1.participating_ic_id
AND ep1.uniqueidentifier = ned_person_t_2.uniqueidentifier)
)
AND ned_person_t_2.current_flag = 'Y' and ned_person_t_2.inactive_date is null and ned_person_t_2.organizationalstat = 'EMPLOYEE'
) as ACTIVE_APPROVER1,
First of all, if I omit the additional AND block, the following OR-EXISTS by itself works OK:
subq1 = db_session.query(ParticipatingIcT.id).filter((ParticipatingIcT.id == agreement.participating_ic_id),
(ParticipatingIcT.ic_nihsac == func.substr(approver.nihsac, 1, 3)))
.subquery()
subq2 = db_session.query(ExternalPeopleT.id).filter((ExternalPeopleT.participating_ic_id == agreement.participating_ic_id),
(ExternalPeopleT.uniqueidentifier == approver.uniqueidentifier))
.subquery()
subqMain = db_session.query(or_(exists(subq1), exists(subq2))
.subquery()
# ...
# Now we will select from subqMain.
agreements = (db_session.query(
..,
subqMain
But the problem starts when I introduce the final AND block. Conceptually, the final result should be the following:
subqMain = db_session.query(and_(
or_(exists(subq1), exists(subq2)),
approver.current_flag == 'Y',
approver.inactive_date == None,
approver.organizationalstat == 'EMPLOYEE'))
.subquery()
But this actually emits " .. FROM APPROVER_T" right in the sub-query, whereas it should be linked to the FROM APPROVER_T of the main query at the very end. I need to avoid adding the .. FROM [table] which happens as soon as I specify the and_(..). I don't understand why it's doing that. All subqueries are specifically marked as subquery(). The alias approver is defined at the very top as approver = aliased(NedPersonT).
exists1 = db_session.query(
ParticipatingIcT.id
).filter(
(ParticipatingIcT.id == agreement.participating_ic_id),
(ParticipatingIcT.ic_nihsac == func.substr(approver.nihsac, 1, 3))
).exists() # use exists here instead of subquery
exists2 = db_session.query(
ExternalPeopleT.id
).filter(
(ExternalPeopleT.participating_ic_id == agreement.participating_ic_id),
(ExternalPeopleT.uniqueidentifier == approver.uniqueidentifier)
).exists() # use exists again instead of subquery
subqMain = db_session.query(and_(
or_(exists1, exists2),
approver.current_flag == 'Y',
approver.inactive_date == None,
approver.organizationalstat == 'EMPLOYEE')).subquery()
# ...
# Now we will select from subqMain.
agreements = (db_session.query(
OtherModel,
subqMain))
sqlalchemy.orm.Query.exists
Half Baked Example
I also use aliased here because I don't quite understand the method of operations concerning wrapping sub queries in parentheses. It seems to work without it but the RAW sql is confusing to read. This is toy example I made to try and see if the SQL produced was invalid.
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
hobbies = relationship('Hobby', backref='user')
class Hobby(Base):
__tablename__ = 'hobbies'
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('users.id'), nullable = False)
name = Column(String, nullable=False)
Base.metadata.create_all(engine, checkfirst=True)
with Session(engine) as session:
user1 = User(name='user1')
session.add(user1)
session.add_all([
Hobby(name='biking', user=user1),
Hobby(name='running', user=user1),
Hobby(name='eating', user=user1),
])
user2 = User(name='user2')
session.add(user2)
session.add(Hobby(name='biking', user=user2))
session.commit()
nested_user = aliased(User)
subq1 = session.query(Hobby.id).filter(nested_user.id ==Hobby.user_id, Hobby.name.like('bik%'))
subq2 = session.query(Hobby.id).filter(nested_user.id ==Hobby.user_id, Hobby.name.like('eat%'))
subqmain = session.query(nested_user).filter(or_(subq1.exists(), subq2.exists()), nested_user.id > 0).subquery()
q = session.query(User).select_from(User).join(subqmain, User.id == subqmain.c.id)
print (q)
print ([user.name for user in q.all()])
SELECT users.id AS users_id, users.name AS users_name
FROM users JOIN (SELECT users_1.id AS id, users_1.name AS name
FROM users AS users_1
WHERE ((EXISTS (SELECT 1
FROM hobbies
WHERE users_1.id = hobbies.user_id AND hobbies.name LIKE %(name_1)s)) OR (EXISTS (SELECT 1
FROM hobbies
WHERE users_1.id = hobbies.user_id AND hobbies.name LIKE %(name_2)s))) AND users_1.id > %(id_1)s) AS anon_1 ON users.id = anon_1.id
['user1', 'user2']
Something wasn't working with that Sub-Select so I rewrote it as a CASE statement instead. Now it's working. Also, I had to join on a couple of extra tables in the main query to facilitate this new CASE.
casePrimaryApprover = case(
[
(and_(
(agreement.approving_official_id.is_not(None)),
(approver.current_flag == 'Y'),
(approver.inactive_date.is_(None)),
(approver.organizationalstat == 'EMPLOYEE'),
or_(
(participatingIc.ic_nihsac == func.substr(approver.nihsac, 1, 3)),
(externalPeoplePrimaryApprover.id.is_not(None))
)
), 'Y')
],
else_ = 'N'
)
# Main Query
agreements = (db_session.query(
agreement.id,
...
casePrimaryApprover
...
.join(participatingIc, agreement.participating_ic_id == participatingIc.id)
.outerjoin(externalPeoplePrimaryApprover, approver.uniqueidentifier == externalPeoplePrimaryApprover.uniqueidentifier)

Combine two sqlalchemy statements into one

I want to retrieve from my DB a single record, which is a question (I am building a quiz mechanism). I have the following requirements to this hit:
It should be a question from the quiz the user is currently playing
It should be a question which the user has not answered before, eg; it should not be a record in the responses table
The questions should be ordered by category.
The following models are involved:
Questions: Contains all questions from all quizzes
class Questions(db.Model):
# table name
__tablename__ = "questions"
id = db.Column(db.Integer, primary_key=True)
quiz_id = db.Column(db.Integer, ForeignKey("quizzes.id"))
text = db.Column(db.String(80))
category = db.Column(db.Integer)
type = db.Column(db.String(80))
Responses:Contains all responses
class Responses(db.Model):
# table name
__tablename__ = "responses"
id = db.Column(db.Integer, primary_key=True)
session_id = db.Column(db.Integer, ForeignKey("sessions.id"))
question_id = db.Column(db.Integer, ForeignKey("questions.id"))
option_id = db.Column(db.Integer, ForeignKey("options.id"))
answer = db.Column(db.String(80), nullable=True)
Responses: When a user starts a quiz, a session is started. This sessions relates to the quiz being played in order to be able to collect the question, and store and relate the responses to a user.
class Sessions(db.Model):
__tablename__ = "sessions"
id = db.Column(db.Integer, primary_key=True)
quiz_id = db.Column(db.Integer, ForeignKey("quizzes.id"))
user_id = db.Column(db.Integer, ForeignKey("users.id"))e here
For the first and third requirement I have written to following code:
question = Questions.query.filter(Questions.quiz_id == quiz.id).order_by(Questions.category).first()
This works in the sense that it returns the questions in te correct order
For the second requirement I have written the following:
check = Responses.query.filter(Responses.session_id == session[0].id,
Responses.question_id == question.id).scalar() is not None
Next I attempted to create one query, but this was not succesfull. See below code:
question = db.session.query(Questions, Responses).filter(Questions.quiz_id == quiz.id).filter(
Responses.session_id == session[0].id, Responses.question_id == Questions.id).order_by(Questions.category).scalar() is None
The result was it actually only gave me questions which were already answered, instead of unanswered. I am also not sure it's going to filter on the correct question_id already.
EDIT: I think I know have the correct SQL call
SELECT * FROM quiz.questions JOIN quiz.quizzes ON quiz.questions.quiz_id = quiz.quizzes.id WHERE quiz.quizzes.year = 2020 AND quiz.questions.id NOT IN (SELECT question_id FROM quiz.responses WHERE quiz.responses.session_id = 11 ) ORDER BY quiz.questions.category ASC;
You can use except to combine two queries; this will run both queries, and exclude the results of the second query (ie query1 questions - query2 questions):
For example:
# get questions from the current quiz
Questions.query.filter(Questions.quiz_id == quiz.id).\
except(
# except for questions with a response in the current session
Questions.query.filter(
Questions.quiz_id == quiz.id,
Questions.id == Responses.question_id,
Responses.session_id == session[0].id
)
).order_by(Questions.category).first()
In the end, this was the solution:
question = Questions.query\
.filter(Questions.quiz_id == quiz.id) \
.filter(Responses.session_id == session[0].id) \
.filter(Responses.question_id != Questions.id) \
.order_by(Questions.category) \
.order_by(func.rand()) \
.first()

Exclude something from where clause by using SQLAlchemy core

I have the following model:
class Vote(BaseModel):
__tablename__ 'vote'
id = sa.Column(sa.Integer, autoincrement=True, index=True, primary_key=True)
value = sa.Column(sa.Integer, nullable=False)
rated_user_id = sa.Column(
sa.Integer, sa.ForeignKey('user.id', ondelete='cascade'))
rating_user_id = sa.Column(
sa.Integer, sa.ForeignKey('user.id', ondelete='cascade'))
And I just want to make a query with gives me joined data., nevertheless I don't know how to make this query. This is my approach:
query = sa.select(
[votes, users.alias('u1'), users.alias('u2')],
use_labels=True
).select_from(votes.join(users.alias('u1'),votes.c.rated_user_id == users.alias('u1').c.id).join(users.alias('u2'), votes.c.rating_user_id == users.alias('u2').c.id))
Buy it doesn't work because it includes "user" as "u1" in FROM clause.
Thanks!
Each invocation of alias() produces a unique alias object, even if you give them the same label. Instead give the aliases a name and use the same object in every part of your query:
u1 = users.alias('u1')
u2 = users.alias('u2')
query = sa.select([votes, u1, u2], use_labels=True).\
select_from(votes.
join(u1, votes.c.rated_user_id == u1.c.id).
join(u2, votes.c.rating_user_id == u2.c.id))

How do you count the number of self-referential relationships (that fit certain criteria) in a combined query using SQLAlchemy?

I have models that look like this:
class User(db.Model):
id = db.Column(UUID_TYPE, primary_key=True)
class Post(db.Model):
id = db.Column(UUID_TYPE, primary_key=True)
private = db.Column(db.Boolean)
class PostSubscription(db.Model):
user_id = db.Column(UUID_TYPE, db.ForeignKey("users.id"))
user = db.relationship(
"User",
backref=db.backref("subscriptions", cascade="all, delete-orphan")
)
post_id = db.Column(UUID_TYPE, db.ForeignKey("posts.id"))
post = db.relationship(
"Post",
foreign_keys=[post_id],
lazy="joined",
backref=db.backref("subscriptions", cascade="all, delete-orphan"),
)
liked = db.Column(db.Boolean)
I have a query that fetches all of the private PostSubscriptions which a user has access to, e.g.
query = (db.session.query(PostSubscription)
.filter(User.id == user_id, Post.private)
.join(PostSubscription.post)
)
I also want to fetch the total number of likes for the Post in the same query (to avoid the N+1 problem). My issue is that these likes are themselves stored in PostSubscription, which is the main thing I'm fetching in the first place. So the whole mess becomes a bit self-referential / circular.
My initial attempt involved adding a property to the Post model, but as mentioned above, this solution suffers from the N+1 problem, e.g.
#property
def likes(self):
return db.session.query(PostSubscription).filter_by(
post_id=self.id,
liked=True
).count()
After a bit of research, I've realized that what I probably need is to use func.count combined with case, but I can't figure out how I do this in my particular use case. What I want to do is this:
query = (
db.session.query(
PostSubscription,
func.count(case([Post.subscriptions.liked, Post.id]))
)
.filter(User.id == user_id, Post.private)
.join(PostSubscription.post)
)
But that obviously doesn't work because I can't reference a relationship like that. The two tables are already joined, I just don't know how to only get the subscriptions for the Post I already have (that is linked to the PostSubscriptions I am fetching).
Any thoughts?
You could try the following SQLAlchemy query:
from sqlalchemy.orm import aliased, contains_eager
u = 1
PS1 = aliased(PostSubscription)
PS2 = aliased(PostSubscription)
query = db.session.query(PS1, func.count(PS2.liked))\
.join(Post, PS1.post_id == Post.id)\
.join(PS2, PS2.post_id == PS1.post_id)\
.options(contains_eager(PS1.post))\
.filter(PS1.user_id == u, Post.private)\
.group_by(PS1.id)
Which should give the following SQL:
SELECT post.id AS post_id,
post.private AS post_private,
postsubscription_1.id AS postsubscription_1_id,
postsubscription_1.user_id AS postsubscription_1_user_id,
postsubscription_1.post_id AS postsubscription_1_post_id,
postsubscription_1.liked AS postsubscription_1_liked,
Count(postsubscription_2.liked) AS count_1
FROM postsubscription AS postsubscription_1
JOIN post ON postsubscription_1.post_id = post.id
JOIN postsubscription AS postsubscription_2 ON postsubscription_2.post_id = postsubscription_1.post_id
WHERE postsubscription_1.user_id = 1
AND post.private = 1
GROUP BY postsubscription_1.id
Which you could then for example execute and print like this:
result = query.all()
for postsubscription, likes in result:
print(postsubscription, likes)

Categories

Resources