Select item having maximum from sqlalchemy relationship - python

Given this pair of classes:
class Thing(Base):
id = Column(Integer, primary_key=True)
class ThingInfo(Base):
id = Column(Integer, primary_key=True)
thing_id = Column(Integer, ForeignKey(Thing))
recorded_at = Column(DateTime)
thing = relationship(Thing, backref='all_info')
How can I define a property Thing.newest_info to achieve:
t = s.query(Thing).first()
newest_info = max(t.all_info, key=lambda i: i.recorded_at)
print newest_info
#equivalent to:
t = s.query(Thing).first()
print t.newest_info
I'd like to do this with a column_property or relationship, not a normal property. So far what I have is:
select([ThingInfo])
.group_by(ThingInfo.thing)
.having(func.max(ThingInfo.recorded_at))
But I can't figure out how to attach this as a propery of a single Thing object.

Add an order_by clause to the relationship and this becomes trivial:
class ThingInfo(Base):
id = Column(Integer, primary_key=True)
thing_id = Column(Integer, ForeignKey(Thing))
recorded_at = Column(DateTime)
thing = relationship(Thing, backref=backref('all_info', order_by='ThingInfo.recorded_at')
thing = session.query(Thing).get(id)
newest_info = thing.all_info[-1]
or alternatively backref=backref('all_info', order_by='desc(ThingInfo.recorded_at)') and newest_info=thing.all_info[0].

Ok, here's a working attempt:
t = aliased(ThingInfo)
ThingInfo.is_newest = column_property(
select([
ThingInfo.recorded_at == func.max(t.recorded_at)
])
.select_from(r)
.where(t.thing_id == ThingInfo.thing_id)
)
Thing.newest_info = relationship(
ThingInfo,
viewonly=True,
uselist=False,
primaryjoin=(ThingInfo.thing_id == Thing.id) & ThingInfo.is_newest
)
Things I dislike about this:
I'm having to specify how to join Things to ThingInfos in a second place
I'm trying to work out how to write this to use a groubby

Related

How to access column values in SQLAlchemy result list after a join a query

I need to access colums of result query. I have these models
class Order(Base):
__tablename__ = "orders"
internal_id = Column(Integer, primary_key=True)
total_cost = Column(Float, nullable=False)
created_at = Column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()"))
customer_id = Column(Integer, ForeignKey("customers.id", ondelete="CASCADE"), nullable=False)
customer = relationship("Customer")
class Item(Base):
__tablename__ = "items"
id = Column(Integer, primary_key=True, nullable=False)
internal_id = Column(Integer, nullable=False)
price = Column(Float, nullable=False)
description = Column(String, nullable=False)
order_id = Column(Integer, ForeignKey("orders.internal_id", ondelete="CASCADE"), nullable=False)
order = relationship("Order")
Now I run this left join query that gives me all the columns from both tables
result = db.query(Order, Item).join(Item, Item.order_id == Order.internal_id, isouter=True).filter(Item.order_id == order_id).all()
I get back a list of tuples. How do I access a particular column of the result list? Doing something like this
for i in result:
print(i.???) # NOW WHAT?
Getting AttributeError: Could not locate column in row for column anytime i try to fetch it by the name I declared.
this is the full function where I need to use it
#router.get("/{order_id}")
def get_orders(order_id: int, db: Session = Depends(get_db)):
""" Get one order by id. """
# select * from orders left join items on orders.internal_id = items.order_id where orders.internal_id = {order_id};
result = db.query(Order, Item).join(Item, Item.order_id == Order.internal_id, isouter=True).filter(Item.order_id == order_id).all()
for i in result:
print(i.description) # whatever value i put here it errors out
This is the traceback
...
print(i.description) # whatever value i put here it errors out
AttributeError: Could not locate column in row for column 'description'
At least if I could somehow get the column names.. But i just cant get them. Trying keys(), _metadata.keys .. etc. Nothing works so far.
If additional implicite queries are not an issue for you, you can do something like this:
class Order(Base):
__tablename__ = "orders"
internal_id = Column(Integer, primary_key=True)
total_cost = Column(Float, nullable=False)
created_at = Column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()"))
customer_id = Column(Integer, ForeignKey("customers.id", ondelete="CASCADE"), nullable=False)
customer = relationship("Customer")
items = relationship("Item", lazy="dynamic")
order = session.query(Order).join(Item, Order.internal_id == Item.order_id, isoutrr=True).filter(Order.internal_id == order_id).first()
if order:
for i in order.items:
print(i.description)
print(order.total_cost)
However to avoid additional query when accessing items you can exploit contains_eager option:
from sqlalchemy.orm import contains_eager
order = session.query(Order).join(Item, Order.internal_id == Item.order_id, isoutrr=True).options(contains_eager("items").filter(Order.internal_id == order_id).all()
Here you have some examples: https://jorzel.hashnode.dev/an-orm-can-bite-you
Ok, so acctualy the answer is quite simple. One just simply needs to use dot notation like i.Order.total_cost or whichever other field from the Order model
result = db.query(Order, Item).join(Item, Item.order_id == Order.internal_id, isouter=True).filter(Item.order_id == order_id).all()
for i in result:
print(i.Order.total_cost)
print(i.Item.description)

SQLAlchemy counting all rows, I want specific rows

In words, I'm trying to achieve this goal:
"Get 5 comments where comment.post_id == self.context.id and sort those by the highest number of Comment_Vote.vote_type == 'like' "
Currently the models are:
vote_enum = ENUM('like', 'dislike', name='vote_enum', create_type=False)
class User(Base):
__tablename__='users'
id = Column(Integer, primary_key=True, autoincrement=True)
username = Column(String(65), nullable=False)
comments = relationship('Comment', backref='user')
comment_vote = relationship('Comment_Vote', backref='user')
posts=relationship('Post', backref='user')
class Post(Base):
__tablename__ = 'post'
id = Column(Integer, primary_key=True, autoincrement=True)
body= Column(String(1500))
comments= relationship('Comment',backref='post', order_by='desc(Comment.date_created)', lazy='dynamic')
owner_id= Column(Integer, ForeignKey('users.id'))
class Comment(Base):
__tablename__='comment'
id = Column(Integer, primary_key=True, autoincrement=True)
body= Column(String(500))
parent_id = Column(Integer, ForeignKey('comment.id'))
post_id= Column(Integer, ForeignKey('post.id'))
user_id= Column(Integer, ForeignKey('users.id'))
children = relationship("Comment",
backref=backref('parent', remote_side=[id]),
lazy='dynamic'
)
del_flag= Column(Boolean, default=False)
date_created= Column(DateTime(), default=datetime.datetime.utcnow())
last_edited= Column(DateTime(), default=datetime.datetime.utcnow())
comment_vote= relationship("Comment_Vote", backref="comment", lazy='dynamic')
class Comment_Vote(Base):
__tablename__='comment_vote'
id = Column(Integer, primary_key=True, autoincrement=True)
user_id= Column(Integer, ForeignKey('users.id'))
comment_id= Column(Integer, ForeignKey('comment.id'))
vote_type = Column('vote_enum', vote_enum)
#classmethod
def total_likes(cls, comment_id, session):
return session.query(cls).filter(cls.id == comment_id).first().comment_vote.filter(Comment_Vote.vote_type=='like').count()
My functioning query is:
f = session.query(Comment_Vote.comment_id, funcfilter(func.count(1), Comment_Vote.vote_type == 'like').label('total_likes')).group_by(Comment_Vote.comment_id).subquery()
comments = session.query(Comment, f.c.total_likes).join(f, Comment.id==f.c.comment_id).filter(Comment.post_id == self.context.id).order_by('total_likes DESC').limit(5)
This has the nasty side effect of counting ALL comment_vote 'likes', even for comments that aren't relevant to that post.
I'd be really grateful for a bit of advice on how to rearrange this so it didn't have to count everything first. What I want may not be possible, and I'm working mostly within the ORM.
DB behind the SQLAlchemy is Postgresql.
This could be a nice place to use a lateral subquery. It is the "foreach" of SQL, which is to say that a lateral subquery can reference columns of preceding FROM items. Postgresql supports lateral from versions 9.3 and up, SQLAlchemy from versions 1.1 and up:
from sqlalchemy import true
f = session.query(func.count(1).label('total_likes')).\
filter(Comment_Vote.comment_id == Comment.id, # References Comment
Comment_Vote.vote_type == 'like').\
subquery().\
lateral()
comments = session.query(Comment, f.c.total_likes).\
join(f, true()).\
filter(Comment.post_id == self.context.id).\
order_by(f.c.total_likes.desc()).\
limit(5)
I moved filtering based on vote_type to WHERE clause of the subquery, as it's unnecessary in this case to first fetch all rows and then filter in the aggregate function (which also cannot use indexes).
Of course in this case you could also use a scalar subquery in the SELECT output for same effect:
f = session.query(func.count(1)).\
filter(Comment_Vote.comment_id == Comment.id, # References Comment
Comment_Vote.vote_type == 'like').\
label('total_likes')
comments = session.query(Comment, f).\
filter(Comment.post_id == self.context.id).\
order_by(f.desc()).\
limit(5)

How do you use the `secondary` kwarg to fit this situation in SqlAlchemy?

I have a situation where a user can belong to many courses, and a course can contain many users. I have it modeled in SqlAlchemy like so:
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
class Course(Base):
__tablename__ = 'courses'
id = Column(Integer, primary_key=True)
archived = Column(DateTime)
class CourseJoin(Base):
__tablename__ = 'course_joins'
id = Column(Integer, primary_key=True)
# Foreign keys
user_id = Column(Integer, ForeignKey('users.id'))
course_id = Column(Integer, ForeignKey('courses.id'))
In the system, we have the ability to "archive" a course. This is marked by a datetime field on the course model. I would like to give the User model a relationship called course_joins that only contains CourseJoins where the respective Course hasn't been archived. I'm trying to use the secondary kwarg to accomplish this like so:
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
course_joins = relationship('CourseJoin',
secondary='join(Course, CourseJoin.course_id == Course.id)',
primaryjoin='and_(CourseJoin.user_id == User.id,'
'Course.archived == None)',
order_by='CourseJoin.created')
However I'm getting this error:
InvalidRequestError: One or more mappers failed to initialize - can't proceed with initialization of other mappers. Original exception was: FROM expression expected
I believe this is the exact usecase for the secondary kwarg of relationship(), but I'm not sure what's going on.
If you really just have many-to-many relationship (plus created) column, I think the right way to define the relationship is:
courses = relationship(
'Course',
secondary='course_joins',
primaryjoin='users.c.id == course_joins.c.user_id',
secondaryjoin='and_(courses.c.id == course_joins.c.course_id, courses.c.archived == None)',
order_by='course_joins.c.created',
viewonly=True,
)
and use it like:
u1 = User(courses=[Course()])
session.add(u1)
u2 = User(courses=[Course(archived=datetime.date(2013, 1, 1))])
session.add(u2)
Otherwise, just drop the secondary completely and add your other condition to primaryjoin:
courses = relationship(
'CourseJoin',
primaryjoin=\
'and_(users.c.id == course_joins.c.user_id, '
'courses.c.id == course_joins.c.course_id, '
'courses.c.archived == None)',
order_by='course_joins.c.created',
)

create_or_get entry in a table

I have two related classes as below:
class IP(Base):
__tablename__ = 'ip'
id = Column(Integer, primary_key=True)
value = Column(String, unique=True)
topics = relationship('Topic')
class Topic(Base):
__tablename__ = 'topic'
id = Column(Integer, primary_key=True)
value = Column(String)
ip_id = Column(Integer, ForeignKey('ip.id'))
ip = relationship('IP')
if __name__ == '__main__':
Base.metadata.create_all(engine)
topics = [
Topic(value='t1', ip=IP(value='239.255.48.1')),
Topic(value='t2', ip=IP(value='239.255.48.1')),
Topic(value='t3', ip=IP(value='239.255.48.1'))
]
session.add_all(topics)
The above doesnt work as it tries to add different ip entries with same value. Is it possible to create or get the existing one so that I can use like below?
topics = [
Topic(value='t1', ip=create_or_get(value='239.255.48.1')),
Topic(value='t2', ip=create_or_get(value='239.255.48.1')),
Topic(value='t3', ip=create_or_get(value='239.255.48.1'))
]
Sure, just create the function:
def create_or_get(value):
obj = session.query(IP).filter(IP.value==value).first()
if not obj:
obj = IP(value=value)
session.add(obj)
return obj
Of course, it needs a session, but if you use scoped_session factory, it is straightforward. Alternatively, you might look into events, but it gets too complicated for the problem to solve.

Having issues creating a query with an inner join and an outer join

I have the following model (simplified):
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
class Thing(Base):
__tablename__ = 'thing'
id = Column(Integer, primary_key=True)
class Relationship(Base):
__tablename__ = 'relationship'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey('thing.id'))
parent = relationship('Thing', backref='parentrelationships', primaryjoin = "Relationship.parent_id == Thing.id")
child_id = Column(Integer, ForeignKey('thing.id'))
child = relationship('Thing', backref='childrelationships', primaryjoin = "Relationship.child_id == Thing.id")
class Vote(Base)
__tablename__ = 'vote'
id = Column(Integer, primary_key=True)
rel_id = Column(Integer, ForeignKey('relationship.id'))
rel = relationship('Relationship', backref='votes')
voter_id = Column(Integer, ForeignKey('user.id'))
voter = relationship('User', backref='votes')
I wanted to query all Relationships with a certain parent, and I also want to query votes made by a certain user on those Relationships. What I've tried:
def get_relationships(thisthing, thisuser):
return DBSession.query(Relationship, Vote).\
filter(Relationship.parent_id == thisthing.id).\
outerjoin(Vote, Relationship.id == Vote.rel_id).\
filter(Vote.voter_id == thisuser.id).\
filter(Vote.rel_id == Relationship.id).\
all()
as well as:
def get_relationships(thisthing, thisuser):
session = DBSession()
rels = session.query(Relationship).\
filter(Relationship.parent_id == thisthing.id).\
subquery()
return session.query(rels, Vote).\
outerjoin(Vote, rels.c.id == Vote.rel_id).\
filter(Vote.voter_id == thisuser.id).\
all()
I get nulls when I do either of these queries. What am I doing wrong?
Just turn on SQL logging (echo=True) and you will see that the resulting SQL query for the first option is something like:
SELECT relationship.id AS relationship_id, relationship.parent_id AS relationship_parent_id, relationship.child_id AS relationship_child_id, vote.id AS vote_id, vote.rel_id AS vote_rel_id, vote.voter_id AS vote_voter_id
FROM relationship LEFT OUTER JOIN vote ON relationship.id = vote.rel_id
WHERE relationship.parent_id = ? AND vote.voter_id = ? AND vote.rel_id = relationship.id
If you examine it, you will notice that the clause vote.rel_id = relationship.id is part of both the JOIN clause and the WHERE clause, which makes the query to filter out those Relationship rows which do not have any votes by requested user.
Solution:
Remove redundant filter(Vote.rel_id == Relationship.id). part from the query.
Edit-1: Also move (remove) the filter for the user filter(Vote.voter_id == thisuser.id) out of WHERE and into the LEFT JOIN clause: outerjoin(Vote, and_(Relationship.id == Vote.rel_id, Vote.voter_id == thisuser.id)).

Categories

Resources