SQL-Alchemy join query - most efficient way to query

SQL-Alchemy join query - most efficient way to query - python

I am trying to generate a query and having difficulty finding the most efficient way to do it in sqlalchemy, (note I'm using flask-sqlalchemy)
The goal is to find all users have a meeting with a specific user.
So let's say Frank has 10 meetings coming up, I want to generate a list of all people frank has a meeting with.
Here are my models:
class UserMeeting(db.Model):
""" Associative table, links meetings to users in a many to many fashion"""
__tablename__ = 'userMeeting'
id = db.Column(db.Integer, primary_key = True)
meeting_id = db.Column(db.Integer, db.ForeignKey('meeting.id'), primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), primary_key=True)
class Meeting(db.Model):
__tablename__ = "meeting"
id = db.Column(db.Integer, primary_key = True)
title = db.Column(db.String(128))
#... other columns
#associative reference
attendees = db.relationship('UserMeeting', backref='meeting')
class User(db.Model):
__tablename__ = 'user'
id = db.Column(db.Integer, primary_key = True)
email = db.Column(db.String(128), index=True, unique=True)
password = db.Column(db.String(128))
#associative reference
attendingMeetings = db.relationship("UserMeeting", backref="user", cascade="all, delete-orphan")
Here's what I've tried:
#Assume frank's a user with id == 1
frank = User.query.get(1)
franks_meetings = Meeting.query.join(Meeting.attendees).filter(UserMeeting.user == frank).all()
#not efficient way of getting users in meetings with frank
users = []
for meeting in franks_meetings:
for userMeeting in meeting.attendees:
if userMeeting.user != frank:
users.append(userMeeting.user)
#is there a way to just generate one query and get this data?
I seem to be missing how I could just use joins to get this data. Any help would be appreciated!

You need to join the UserMeeting table with itself, using meeting_id as the join key. You may need to alias the table in order to reference it twice. I don't know if I can type the sqlalchemy syntax for it off the top of my head, but the sql looks like:
select distinct(b.user_id) as other_user_id
from usermeeting a
inner join usermeeting b
on a.meeting_id=b.meeting_id
where a.user_id=1 and b.user_id != 1;
And 1 is Frank.
Oh, and getting the user details too. Probably you could end up with User objects directly from doing this in sqlalchemy:
select distinct(u.id), u.email
from usermeeting a
inner join usermeeting b
on a.meeting_id=b.meeting_id
inner join users u
on b.user_id=u.id
where a.user_id=1 and b.user_id != 1;

Here's the sqlalchemy version of the query for reference:
#get all users in meetings with Frank, (frank.id == 1)
um = aliased(UserMeeting)
frank = User.query.get(1)
q = session.query(User).join(User.attendingMeetings).\
filter(UserMeeting.meeting_id == um.meeting_id).\
filter(UserMeeting.user_id != frank.id, um.user_id == frank.id)
users_meeting_with_frank = q.all()

Related

Filtering by nested object fields when lazy='joined' is set in relationship SQLAlchemy 1.4

class ContactType(Base):
__tablename__ = 'contact_type'
name = Column(String(255), nullable=False)
class Contact(Base):
__tablename__ = 'contact'
first_name = Column(String(255), nullable=False)
last_name = Column(String(255), nullable=False)
contact_type_id = Column(ForeignKey('contact_type.id'), nullable=False)
contact_type = relationship('ContactType', lazy='joined', innerjoin=True)
ideally, I would filter by Contact.contact_type.name , but it doesn't work that way
query = select(Contact).where(ContactType.name == 'some_type') - doesn't work
query = select(Contact).join(ContactType).where(ContactType.name == 'some_type') - works,
but since contact_type = relationship('ContractorType', lazy='joined', innerjoin=True), it already makes a join (in other cases this feature is used)
an additional join of the ContactType table is dubbed JOIN (visible when echo= True)
if I use contains_eager
query = select(Contact).options(contains_eager(Contact.contact_type)).where(ContactType.name == 'some_type') - works, but
(SAWarning: SELECT statement has a cartesian product between FROM element(s) 'contact_type. Apply join condition(s) between each element to resolve.)
Please tell me how I can do this

I asked the same question in a SQLAlchemy GitHub discussion and they suggested this:
query = (
select(Contact)
.join(Contact.contact_type)
.options(contains_eager(Contact.contact_type))
.where(ContactType.name == "some_type")
)
I checked it, and it works fine.

How to properly select duplicate field names in a raw JOIN query in SQLAlchemy

I am tying to run a raw MySQL JOIN query using Connection.execute() in SQLAlchcemy. However, I am having trouble with getting the data of any two fields/columns with the same name.
My models:
class Client(db.Model):
id = db.Column(db.Integer, primary_key=True, autoincrement= True)
client_name = db.Column(db.String(80), nullable=False)
email = db.Column(db.String(120), nullable=False)
class User(db.Model):
__table_args__ = {'mysql_engine': 'InnoDB', 'mysql_charset': 'utf8', 'mysql_collate': 'utf8_general_ci'}
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
username = db.Column(db.String(80), nullable=False)
email = db.Column(db.String(120), nullable=False)
client_id = db.Column('client_id', db.Integer, nullable=True, index=True)
As you can see in both models I have a field called id
Then I have the function
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+pymysql://my_user:password#localhost/my_db'
db = SQLAlchemy(app)
conn = db.engine.connect()
sql_statement = """SELECT * FROM User u JOIN Client c ON u.client_id = c.id"""
info = conn.execute(sql_statement)
for row in info:
print(row)
row = dict(row)
print(row)
When I loop through the results of the execution I expect to get back records with two fields u.id and c.id however I am getting only one field with the name id (not even u.id or c.id).
I have tried both SELECT u.*, c.* and SELECT u.id , c.id but that did not work.
The first print (before converting to dict) prints the values of all the fields with both ids, but after I convert it to dict it does not work anymore, and in my use-case, I must convert it to dict.
How can I get the expected behaviour without having to use AS in my query?
Note: If I use SELECT u.* AS u_id, c.id AS c_id it works fine but I am looking for another way without using AS.

The only work around I found for this issue was to do the following:
sql_statement = """SELECT u.id AS `u.id`, c.id AS `c.id` FROM User u JOIN Client c ON u.client_id = c.id"""
This is not a solution for this issue but just the work around I followed and still not so satisfied with!

sqlalchemy - Limit for joined table as if they where not joined

I am using sqlalchemy, and I want to get the following data from user and apply those operations in the order given to a table:
keyword to filter the data with, column to order by, limit and page
number
now I have many tables. The majority "children" tables - a table that has no children - work. However I have a table with a lot of relationships of all kinds .. one to many on both sides, one to one and many to many
to achieve the above operations, I joined all the tables beforehand. filtering and ordering works fine but limit does not give me the wanted result
Join statement:
records = m.Activity.query.join(m.Event, m.Activity.events) \
.join(m.DateLocation, m.Activity.date_locations) \
.join(m.Goal, m.Activity.goals) \
.join(m.Type, m.Activity.type)
filtering and ordering contains a lot of unnessecary information, basically something like this:
# filtering if column == event
records = records.filter(m.Event.name == keyword)
# ordering if column == type and desc was chosen
records = records.order_by(m.Type.name.desc())
and finally limit and pagination:
records = records.limit(limit)
records = records.offset((page - 1) * limit)
Let me explain limit behavior vs what I want:
limit in this code works fine. since I joined all the tables it will return the number of joined row I gave it .. if the join resulted in extra 5 rows and I asked for limit 5 for example, it will return the first 5 regardless of the original table id
What I want is the limit behavior before joining. I only joined them to filter or order by them. after that when I say limit (5) I want to return the first 5 results with distinct ids
I tried the following(one at a time) but didn't work:
records = records.distinct(m.Activity.id).limit(limit)
records = records.group_by(m.Activity.id).limit(limit)
records = records.from_self().limit(limit)
I tried the solution presented here. It does work HOWEVER it limits the set of data BEFORE joining. which doesn't work in my case since I need to limit the filtered data
EDIT: The models:
EventsInActivities = db.Table(
'events_in_activities',
db.Column('activity_id', db.String, db.ForeignKey('activity.id')),
db.Column('event_id', db.Integer(), db.ForeignKey('event.id'))
)
class Event(db.Model, BaseMixin):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
name = db.Column(db.String)
class Type(db.Model, BaseMixin):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
name = db.Column(db.String, unique=True)
activities = db.relationship("Activity", backref="type", lazy='dynamic')
class Goal(db.Model, BaseMixin):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
activity_id = db.Column(db.String, db.ForeignKey('activity.id'), primary_key=True)
name = db.Column(db.String())
class DateLocation(db.Model, BaseMixin):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
activity_id = db.Column(db.String, db.ForeignKey('activity.id'), primary_key=True)
start_date = db.Column(db.DateTime)
end_date = db.Column(db.DateTime)
location = db.Column(db.String())
class Activity(db.Model, BaseMixin):
id = db.Column(db.String, primary_key=True)
name = db.Column(db.String())
type_id = db.Column(db.Integer, db.ForeignKey('type.id'))
date_locations = db.relationship("DateLocation", order_by='DateLocation.start_date', cascade="all, delete", backref="activity", lazy='dynamic')
goals = db.relationship("Goal", cascade="all, delete", backref="activity", lazy='dynamic')
events = db.relationship('Event', secondary=EventsInActivities, backref=db.backref('activities', lazy='dynamic'))

You could replace at least some of the joins for filters with EXISTS subquery expressions, or semijoins in a way. This way your query avoids producing multiple rows for single activity. It is ok to still join against Type, since it's a many to one relationship:
records = m.Activity.query.\
join(m.Activity.type).\
filter(m.Activity.events.any(name=keyword)).\
filter(m.Activity.goals.any(name=...)).\
filter(...).\
order_by(m.Type.name.desc()).\
limit(limit).\
offset((page - 1) * limit)
Passing keyword arguments to any() is a similar shorthand as filter_by(). It accepts complex criterion expressions as well, as positional arguments.
The distinct(m.Activity.id), or DISTINCT ON, should've worked as well, as long as you then use the results as a subquery, to which you then apply the ordering and limit:
records = m.Activity.query.\
join(m.Activity.events).\
join(m.Activity.date_locations).\
join(m.Activity.goals).\
filter(m.Event.name == keyword).\
filter(...).\
distinct(m.Activity.id).\
from_self().\
join(m.Activity.type).\
order_by(m.Type.name.desc()).\
limit(limit).\
offset((page - 1) * limit)

How can I construct a count aggregation over a join with SqlAlchemy?

I have a table of users, a table of groups that those users may belong to, and a join table between users and groups.
This is represented in SQLAlchemy as follows:
class User(Base):
__tablename__ = 'user'
user_id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
email = Column(String(250), nullable=False)
groups = relationship('Group', secondary='user_group_pair')
class Group(Base):
__tablename__ = 'group'
group_id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
date_created = Column(String(250), nullable=False)
members = relationship('User', secondary='user_group_pair')
class User_Group_Pair(Base):
__tablename__ = 'user_group_pair'
user_group_pair_id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('user.user_id'))
group_id = Column(Integer, ForeignKey('group.group_id'))
user = relationship(User, backref=backref("group_assoc"))
group = relationship(Group, backref=backref("user_assoc"))
I'm trying to solve the following simple problem:
I want to write a query that will return a list of users along with the number of groups that each of them belongs to.
This requires data from both User and User_Group_Pair (thus why the title of my question refers to a join), and a count aggregation grouped by user_id.
I'm not sure why this won't work:
subq = session.query(User_Group_Pair.user_id.label('user_id'), func.count(User_Group_Pair.user_group_pair_id).label('count')).\
group_by(User_Group_Pair.user_id).order_by('count ASC').subquery()
result = session.query(User).join(subq, User.user_id == subq.user_id).all()
I get this error:
'Alias' object has no attribute 'user_id'
However, note that I have labelled User_Group_Pair.user_id with the label 'user_id'... Any thoughts?
Thank you

Just change subq.user_id to subq.c.user_id (c stands for columns) to make it work:
result = session.query(User).join(subq, User.user_id == subq.c.user_id).all()
But still you will get only those users which belong to at least one group, and the number of groups is not really returned in the result of the query. The query below is an approach to solve this issue:
q = (session.query(User, func.count(Group.group_id).label("num_groups"))
.outerjoin(Group, User.groups)
.group_by(User.user_id)
)
for b, num_groups in q:
print(b, num_groups)

http://docs.sqlalchemy.org/en/rel_1_0/orm/tutorial.html#using-subqueries
subquery() method on Query produces a SQL expression construct representing a SELECT statement embedded within an alias. The columns on the statement are accessible through an attribute called c.
You can use column names with .c.column_name in your query
result = session.query(User).join(subq, User.user_id == subq.c.user_id).all()

How to sort a table by maximum value of a column using flask/sqlalchemy?

I have a self referential talbe with bellow User class:
class User(db.Model):
id = db.Column(db.Integer, primary_key = True)
username = db.Column(db.String(50), unique = True)
password = db.Column(db.String(50))
email = db.Column(db.String(100), index = True, unique =True)
age = db.Column(db.SmallInteger())
about_user = db.Column(db.String(500))
img_url = db.Column(db.String(120))
is_friend = db.relationship('User',
secondary = friends,
primaryjoin = (friends.c.user_id == id),
secondaryjoin = (friends.c.friend_id == id),
backref = db.backref('friends', lazy = 'dynamic'),
lazy = 'dynamic')
and I would like to sort the query based on the maximum number of is_friend(). The query must be something like bellow:
User.query.order_by(func.max(User.is_friend)).paginate(page, MONKEYS_PER_PAGE, False)
but the above query is not allowable it argues that AttributeError: 'Table' object has no attribute 'type'
What is the right way to sort the table based on the maximum number of friend that a user have?

# count the number of friends for each user
# friends are users as well, so need alias
# construct subquery for use in final query
friend = db.aliased(User)
sub = db.session.query(
User.id,
db.func.count(friend.id).label('fc')
).join(friend, User.friends
).group_by(User.id).subquery()
# query users, join on subquery to get friend count
# order by friend count, descending
users = db.session.query(User
).join(sub, sub.c.id == User.id
).order_by(sub.c.fc.desc()
).paginate(page, MONKEYS_PER_PAGE, False)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

SQL-Alchemy join query - most efficient way to query - python

Related

Filtering by nested object fields when lazy='joined' is set in relationship SQLAlchemy 1.4

How to properly select duplicate field names in a raw JOIN query in SQLAlchemy

sqlalchemy - Limit for joined table as if they where not joined

How can I construct a count aggregation over a join with SqlAlchemy?

How to sort a table by maximum value of a column using flask/sqlalchemy?

Categories

Resources