Grouping totals in SQLAlchemy

Grouping totals in SQLAlchemy - python

I can't seem to find any good documentation on this. I have a list of users and order amounts, and I want to display the users with the top 10 order amount totals. I've been having trouble creating a query that sufficiently extracts this data in SQLAlchemy. Is there a better way to approach this?
customers, amount = DBSession.query(Order.customer, func.sum(Order.amount).label('totalamount')).\
group_by(Order.customer).\
order_by(func.desc(totalamount)).\
limit(10)
for a, b in zip(customers, amount):
print a.name, str(amount)

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
import random
Base= declarative_base()
class Customer(Base):
__tablename__ = 'customer'
id = Column(Integer, primary_key=True)
name = Column(Unicode)
orders = relationship("Order", backref="customer")
class Order(Base):
__tablename__ = "order"
id = Column(Integer, primary_key=True)
customer_id= Column(Integer, ForeignKey('customer.id'))
amount = Column(Integer)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
session = Session(e)
session.add_all([
Customer(name="c%d" % i, orders=[
Order(amount=random.randint(10, 100))
for j in xrange(random.randint(0, 5))
])
for i in xrange(100)
])
amount_sum = func.sum(Order.amount).label('totalamount')
amount = session.query(Order.customer_id, amount_sum).\
group_by(Order.customer_id).\
order_by(amount_sum.desc()).\
limit(10).\
subquery()
for a, b in session.query(Customer, amount.c.totalamount).\
join(amount, amount.c.customer_id==Customer.id):
print a.name, b
some guidelines on the pattern here are at http://www.sqlalchemy.org/docs/orm/tutorial.html#using-subqueries, but overall start in SQL first.

Related

SqlAlchemy "contains_eager" does not appear to load nested relationships

I have a schema as follows:
Thing # Base class for below tables
- id
Ball (Thing)
- color
Bin (Thing)
- ball -> Ball.id
Court (Thing)
- homeBin -> Bin.id
- awayBin -> Bin.id
I'd like to ensure that whenever I load a set of Courts, it includes the latest Ball column values. From what I understand, contains_eager() might be able to help with that:
Indicate that the given attribute should be eagerly loaded from columns stated manually in the query.
I have a test that queries every few seconds for any Courts. I'm finding that, even with contains_eager, I only ever see the same value for Ball.color, even though I've explicitly updated the column's value in the database.
Why does sqlalchemy appear to reuse this old data?
Below is a working example of what's happening:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Thing(Base):
__tablename__ = "Things"
id = Column(Integer, primary_key=True)
name = Column(String(256))
thingType = Column(String(256))
__mapper_args__ = {
'with_polymorphic':'*',
'polymorphic_on':"thingType",
'polymorphic_identity':"thing"
}
class Ball(Thing):
__tablename__ = "Balls"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
color = Column('color', String(256))
__mapper_args__ = {
'polymorphic_identity':'ball'
}
class Bin(Thing):
__tablename__ = "Bins"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
shape = Column('shape', String(256))
ballId = Column('ballId', Integer, ForeignKey('Balls.id'))
ball = relationship(Ball, foreign_keys=[ballId], backref="outputBins")
__mapper_args__ = {
'polymorphic_identity':'bin'
}
pass
class Court(Thing):
__tablename__ = "Courts"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
homeBinId = Column('homeBinId', Integer, ForeignKey('Bins.id'))
awayBinId = Column('awayBinId', Integer, ForeignKey('Bins.id'))
homeBin = relationship(Bin, foreign_keys=[homeBinId], backref="homeCourts")
awayBin = relationship(Bin, foreign_keys=[awayBinId], backref="awayCourts")
__mapper_args__ = {
'polymorphic_identity':'court'
}
metadata = MetaData()
engine = create_engine("postgresql://localhost:5432/")
Session = sessionmaker(bind=engine)
session = Session()
def courtQuery():
awayBalls = aliased(Ball, name="awayBalls")
homeBalls = aliased(Ball, name="homeBalls")
awayBins = aliased(Bin, name="awayBins")
homeBins = aliased(Bin, name="homeBins")
query = session.query(Court)\
.outerjoin(awayBins, Court.awayBinId == awayBins.id)\
.outerjoin(awayBalls, awayBins.ballId == awayBalls.id)\
.outerjoin(homeBins, Court.homeBinId == homeBins.id)\
.outerjoin(homeBalls, homeBins.ballId == homeBalls.id)\
.options(contains_eager(Court.awayBin, alias=awayBins).contains_eager(awayBins.ball, alias=awayBalls))\
.options(contains_eager(Court.homeBin, alias=homeBins).contains_eager(homeBins.ball, alias=homeBalls))
return [r for r in query]
import time
while(True):
results = courtQuery()
court = results[0]
ball = court.homeBin.ball
print(ball.color) # does not change
time.sleep(2)
Environment:
Python 2.7.14
SqlAlchemy 1.3.0b1
PostGres 11.3 (though I've seen this
on Oracle as well)

Python/SQLAlchemy - Need to convert an inner join query with 3 tables into Python

I need some help converting this query into SQLAlchemy.
select field from table t1 join table t2 on t1.detail_id = t2.id join table t3 on t3.id = t2.rate_id where t2.name = 'fred' and t3.rate_type = 'Custom' and t3.description = 'Default';
I have been able to convert inner join queries with two tables, but need some help with this one.
Appreciate your help.TIA.

If simple SQL query is enough you can try:
session.execute("SELECT t1.field AS t1_field "
"FROM t1 JOIN t2 ON t1.detail_id = t2.id JOIN t3 ON t2.rate_id = t3.id "
"WHERE t2.name = :name AND t3.rate_type = :rate_type AND t3.description = :description",
{'name': 'fred', 'rate_type': 'Custom', 'description': 'Default'})
But if you want to use SQLAlchemy declarative base then the query would look like:
results = session.query(T1.field).join(T2, T1.detail_id == T2.id).join(T3, T2.rate_id == T3.id).\
filter(T2.name == 'fred').\
filter(T3.rate_type == 'custom').\
filter(T3.description == 'lorem ipsum').all()
For the following models:
from sqlalchemy import create_engine, Integer, ForeignKey, String, Column
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class T1(Base):
__tablename__ = 't1'
id = Column(Integer, primary_key=True)
field = Column(String)
detail_id = Column(Integer, ForeignKey("t1.id"))
class T2(Base):
__tablename__ = 't2'
id = Column(Integer, primary_key=True)
name = Column(String)
rate_id = Column(Integer, ForeignKey("t1.id"))
class T3(Base):
__tablename__ = 't3'
id = Column(Integer, primary_key=True)
rate_type = Column(String)
description = Column(String)
I hope it helps.

SQLAlchemy provide both ORM way and SQL way to operate database. You can use exactly the raw SQL language (or SQLAlachemy SQL Express) to query.
(1) RAW SQL QUERY,Sample code:
engine = create_engine(...)
q = 'SELECT foo FROM t_bar WHERE col_name=:v_parameters'
rs = engine.execute(sqlalchemy.text(q), v_parameters=your_actual_value)
Check execute and basic usage. Also take look at ResultProxy to understand how to operate on returned result.
(2) ORM. If you want to use ORM, firstly you have to define models and mapper class. Sample Code.
from sqlalchemy import Column, ForeignKey
from sqlalchemy.types import String, Integer
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Father(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# Relationship attributes
children = relationship('Sons',
passive_deletes=True,
back_populates='father')
class Son(Base):
id = Column(Integer, primary_key=True)
name = Column(String(31), unique=True, nullable=False)
# foreign keys
p_id = Column(Integer, ForeignKey('Father.id',
ondelete='CASCADE',
onupdate='CASCADE'))
# Relationship attributes
parent = relationship('Father',
passive_deletes=True,
back_populates='sons')
Then you can do ORM query operations
session.query(Father).join(Father.sons).filter(Son.name=='Sam')
is equal to SQL query
SELECT father.id, father.name FROM father JOIN son ON father.id=son.p_id WHERE son.name='Sam'.
Please check ORM mapper and ORM Query for more information.
For you application. If you have well mapped all of your tables, then you can use ORM way. If you do not need ORM feature, you can just use RAW SQL query.
Thank.

SQLAlchemy adding column with aggregate function to a dynamic loader list (AppenderQuery)

I get an incorrect record set, while adding an aggregate function like func.sum on a dynamic relationship. I have listed out a sample code below to demonstrate this.
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import (
relationship,
scoped_session,
sessionmaker,
backref
)
from sqlalchemy import (
create_engine,
Table,
Column,
Integer,
String,
ForeignKey,
func
)
from zope.sqlalchemy import ZopeTransactionExtension
import transaction
Base = declarative_base()
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
userid = Column(String(15), unique=True, nullable=False)
article_list = relationship("Article", backref="user", lazy="dynamic")
class Tag(Base):
__tablename__ = 'tags'
id = Column(Integer, primary_key=True)
name = Column(String(25), nullable=False, unique=True)
class Article(Base):
__tablename__ = 'articles'
id = Column(Integer, primary_key=True)
title = Column(String(25), nullable=False)
duration = Column(Integer)
user_id = Column(Integer, ForeignKey('users.id'), nullable=False)
tags = relationship('Tag', secondary="tag_map",
backref=backref("article_list", lazy="dynamic"))
tag_map_table = Table(
'tag_map', Base.metadata,
Column('tag_id', Integer, ForeignKey('tags.id'), nullable=False),
Column('article_id', Integer, ForeignKey('articles.id'), nullable=False))
engine = create_engine('sqlite:///tag_test.sqlite')
DBSession.configure(bind=engine)
Base.metadata.create_all(engine)
with transaction.manager:
t1 = Tag(name='software')
t2 = Tag(name='hardware')
john = User(userid='john')
a1 = Article(title='First article', duration=300)
a1.user = john
a1.tags.append(t1)
a1.tags.append(t2)
DBSession.add(a1)
a2 = Article(title='Second article', duration=50)
a2.user = john
a2.tags.append(t1)
a2.tags.append(t2)
DBSession.add(a1)
As we see above in the code, I have added two tags for both the articles. Now I want to query the articles written by the user 'John' grouped by tags along with it I want to find the sum of each tag duration.
john = DBSession.query(User).filter(User.userid=='john').first()
res = john.article_list.join(Article.tags).add_column(
func.sum(Article.duration)).group_by(Tag.id)
for article, tsum in res:
print ("Article : %s, Sum duration : %d" % (article.title, tsum))
The query generated for res is
SELECT articles.id AS articles_id, articles.title AS articles_title, articles.duration AS articles_duration, articles.user_id AS articles_user_id, sum(articles.duration) AS sum_1
FROM articles JOIN tag_map AS tag_map_1 ON articles.id = tag_map_1.article_id JOIN tags ON tags.id = tag_map_1.tag_id
WHERE :param_1 = articles.user_id GROUP BY tags.id
which when executed directly on the sqlite database yields two rows corresponding to the two tags
2|Second article|50|1|350
2|Second article|50|1|350
Whereas, the results returned by SQLAlchemy reflect only one row
Article : Second article, Sum duration : 350
But, if I add an extra column to contain tag-name in the AppenderQuery object
res = john.article_list.join(Article.tags).add_column(Tag.name).add_column(
func.sum(Article.duration)).group_by(Tag.id)
for article, tag_name, tsum in res:
print ("Article : %s, Tag : %s, Sum duration : %d" % (
article.title, tag_name, tsum))
I get proper results
Article : Second article, Tag : software, Sum duration : 350
Article : Second article, Tag : hardware, Sum duration : 350
So, what is the right way of using aggregate functions on AppenderQuery object in order to get categorized results?

Sqlalchemy exists with joined inheritance and Firebird

I tried to use sqlaclhemy joined table inheritance and had a strange occurrence.
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
...
class GoodsPlacement(Container, Loadable, Dumpable):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
...
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
content = relationship("GoodsPlacement",
primaryjoin="Departure.content_id==GoodsPlacement.id",
foreign_keys=[content_id],
lazy='joined',
backref="departures")
...
When I write query:
session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
it generates me something like this:
SELECT
objects.objid AS objects_objid,
goods_placements.objid AS goods_placements_objid,
objects.objname AS objects_objname
FROM objects
JOIN goods_placements ON objects.objid = goods_placements.objid
WHERE NOT (EXISTS (
SELECT 1
FROM (
SELECT
objects.objid AS objects_objid,
objects.objname AS objects_objname,
departures.id AS departures_id,
departures.content_id AS departures_content_id,
departures.status_id AS departures_status_id
FROM objects
JOIN departures ON objects.objid = departures.id)
AS anon_1, objects
WHERE anon_1.departures_content_id = objects.objid
AND anon_1.departures_status_id < :status_id_1)
)
And this doesn't work because objects in exist clause overrides outer objects.
As workaround I used exists from sqlexpression directly,
session.query(GoodsPlacement).filter(~exists([1],
and_("departures.status_id<2",
"departures.content_id=goods_placements.objid"),
from_obj="departures"))
but it strongly depends from column and table names.
How I can specify alias for object table in exists statement?
Debian wheezy, python-2.7.3rc2, sqlaclhemy 0.7.7-1

there's a bug involving the declarative system in how it sets up columns. The "objid" name you're giving the columns, distinct from the "id" attribute name, is the source of the issue here. The below test case approximates your above system and shows a workaround until the bug is fixed:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base= declarative_base()
class CommonObject(Base):
__tablename__ = "objects"
id = Column("objid", Integer, primary_key=True)
objname = Column(String(32))
class Container(CommonObject):
__tablename__ = 'containers'
id = Column("objid", Integer, ForeignKey("objects.objid"), primary_key=True)
class GoodsPlacement(Container):
__tablename__ = "goods_placements"
id = Column("objid", Integer, ForeignKey("containers.objid"), primary_key=True)
class Departure(CommonObject):
__tablename__ = "departures"
id = Column(Integer, ForeignKey("objects.objid"), primary_key=True)
content_id = Column(Integer, ForeignKey("goods_placements.objid"))
status_id = Column(Integer)
content = relationship("GoodsPlacement",
primaryjoin=lambda:Departure.__table__.c.content_id==GoodsPlacement.__table__.c.objid,
backref="departures"
)
session = Session()
print session.query(GoodsPlacement).filter(~GoodsPlacement.departures.any(Departure.status_id < 2))
output:
SELECT objects.objid AS objects_objid, containers.objid AS containers_objid, goods_placements.objid AS goods_placements_objid, objects.objname AS objects_objname
FROM objects JOIN containers ON objects.objid = containers.objid JOIN goods_placements ON containers.objid = goods_placements.objid
WHERE NOT (EXISTS (SELECT 1
FROM (SELECT objects.objid AS objects_objid, objects.objname AS objects_objname, departures.id AS departures_id, departures.content_id AS departures_content_id, departures.status_id AS departures_status_id
FROM objects JOIN departures ON objects.objid = departures.id) AS anon_1
WHERE anon_1.departures_content_id = goods_placements.objid AND anon_1.departures_status_id < :status_id_1))

Using PostgreSQL array to store many-to-many relationship

Suppose we have a PostgreSQL database with two tables A, B.
table A columns: id, name
table B columns: id, name, array_a
The column array_a in table B contains a variable length array of ids from table A. In SQLAlchemy we have two classes that model those tables, say class A and B.
The following works fine to get all the objects A that are referenced in an object B:
session.query(A).join(B, A.id == func.any(B.array_a)).filter(B.id == <id>).all()
How can we create a relationship in B referencing the objects A corresponding to the array? Tried column comparators using the func.any above but it complains that ANY(array_a) is not a column in the model. Specifying the primaryjoin conditions as above doesn't seem to cut it either.

This anti-pattern is called "Jaywalking"; and PostgreSQL's powerful type system makes it very tempting. you should be using another table:
CREATE TABLE table_a (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE table_b (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE a_b (
a_id INTEGER PRIMARY KEY REFERENCES table_a(id),
b_id INTEGER PRIMARY KEY REFERENCES table_b(id)
)
Which is mapped:
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
a_b_table = Table("a_b", Base.metadata,
Column("a_id", Integer, ForeignKey("table_a.id"), primary_key=True),
Column("b_id", Integer, ForeignKey("table_b.id"), primary_key=True))
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
a_set = relationship(A, secondary=a_b_table, backref="b_set")
example:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM a_b, table_b
WHERE table_a.id = a_b.a_id AND table_b.id = a_b.b_id AND table_b.name = :name_1)
If you are stuck with the ARRAY column, your best bet is to use an alternate selectable that "looks" like a proper association table.
from sqlalchemy import *
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_b_selectable = select([func.unnest(B.array_a).label("a_id"),
B.id.label("b_id")]).alias()
A.b_set = relationship(B, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True,)
B.a_set = relationship(A, secondary=a_b_selectable,
primaryjoin=A.id == a_b_selectable.c.a_id,
secondaryjoin=a_b_selectable.c.b_id == B.id,
viewonly=True)
which gives you:
>>> print Query(A).filter(A.b_set.any(B.name == "foo"))
SELECT table_a.id AS table_a_id, table_a.name AS table_a_name
FROM table_a
WHERE EXISTS (SELECT 1
FROM (SELECT unnest(table_b.array_a) AS a_id, table_b.id AS b_id
FROM table_b) AS anon_1, table_b
WHERE table_a.id = anon_1.a_id AND anon_1.b_id = table_b.id AND table_b.name = :name_1)
And obviously, since there's no real table there, viewonly=True is neccesary and you can't get the nice, dynamic objecty goodness you would if you had avoided jaywalking.

Or else simply you can join explicitly like below:
class A(Base):
__tablename__ = "table_a"
id = Column(Integer, primary_key=True)
name = Column(String)
class B(Base):
__tablename__ = "table_b"
id = Column(Integer, primary_key=True)
name = Column(String)
array_a = Column(postgresql.ARRAY(Integer))
a_ids= relationship('A',primaryjoin='A.id == any_(foreign(B.array_a))',uselist=True)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Grouping totals in SQLAlchemy - python

Related

SqlAlchemy "contains_eager" does not appear to load nested relationships

Python/SQLAlchemy - Need to convert an inner join query with 3 tables into Python

SQLAlchemy adding column with aggregate function to a dynamic loader list (AppenderQuery)

Sqlalchemy exists with joined inheritance and Firebird

Using PostgreSQL array to store many-to-many relationship

Categories

Resources