SQLalchemy top 3 results from each category

SQLalchemy top 3 results from each category - python

I have a models:
class Contract(ContractJsonSerializer, db.Model):
__tablename__ = 'contracts'
id = db.Column(db.Unicode(32), primary_key=True)
device_name = db.Column(db.Unicode(256), nullable=False)
monthly_price = db.Column(db.Numeric(precision=6, scale=2))
network_id = db.Column(db.Integer(), db.ForeignKey('networks.id'))
class Network(NetworkJsonSerializer, db.Model):
__tablename__ = 'networks'
id = db.Column(db.Integer(), primary_key=True)
name = db.Column(db.Unicode(20), nullable=False)
contracts = db.relationship('Contract', backref='network')
How can I get 3 cheapest contracts from each network?
I got following SQL:
set #num := 0, #network := '';
select *
from
(
select *,
#num := if(#network = network_id, #num + 1, 1) as row_number,
#network := network_id as dummy
from contracts
order by network_id, monthly_price
) as x
where x.row_number <= 3;
But when I'm trying to execute it I'm getting:
contracts = Contract.query.from_statement(sql).all()
"ResourceClosedError: This result object does not return rows. It has been closed automatically."
Can I do it in declarative way? If not what is the best way of approaching this problem?

After few hours of google & sqlalchemy docs I've got this:
# create query order by monthly price
base_query = Contract.query.order_by(Contract.monthly_price)
# build subqueries for each network
queries = []
for n in networks.all():
queries.append( base_query.filter(Contract.network_id==n.id).\
limit(3).subquery().select() )
# get contracts using union_all
contracts = Contract.query.select_entity_from(union_all( *queries )).all()
It seems to work correctly - returning 3 cheapest contracts for each network in one query.

Related

How to group by func.week, year etc in Association proxy

I have two databases:
class AcUsers(db.Model):
__tablename__ = "ac_users"
id = Column(Integer, primary_key=True)
b2c_customer_id = Column(Integer, ForeignKey("b2c_customer.id",
ondelete="CASCADE"))
ac_tests = db.relationship(
"AcTests",
back_populates="ac_user",
uselist=False,
lazy="joined",
)
last_test_submit_time = association_proxy("ac_tests",
"last_test_submit_time")
class AcTests(db.Model):
__tablename__ = "ac_tests"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("ac_users.id"))
ac_user = db.relationship(
"AcUsers",
back_populates="ac_tests",
uselist=False,
lazy="joined",
)
first_test_started_time = Column(DateTime)
last_test_submit_time = Column(DateTime)
I have tried to query the database like this:
ac = (
db.session.query(
func.count(AcUsers.id),
func.week(AcUsers.last_test_submit_time),
func.year(AcUsers.last_test_submit_time),
)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
I got this error:
NotImplementedError: The association proxy can't be used as a plain column expression; it only works inside of a comparison expression
How the I get past this error in order to be able to use the group by week and year in order to know exactly the user for the present dispensation.
I have tried to get the daily, monthly, and yearly data by getting the data in the AcUsers and looping through it:
ac_users = (
db.session.query(AcUsers)
.filter(AcUsers.results_sent)
.filter(AcUsers.last_test_submit_time != None)
.all()
)
daily:
for user in ac_users:
actual_date = user.last_test_submit_time.date()
if actual_date not in daily_data:
daily_data[actual_date] = 1
else:
daily_data[actual_date] += 1
results_sent_daily = []
for x, y in daily_data.items():
results_sent_daily.append({"x": x, "y": y})
I did the same for the monthly and yearly.
How do i make the query use the grouping func into func.year, func.week, func.date etc.
Thank you in advance

As stated in the association proxy documentation, the proxies are not meant for ORM querying aside from filtering.
The most simple thing to do is to craft a query using both models, specifying the join between them and the grouping fields.
ac = (
s.dbs.query(
func.count(AcUsers.id).label("count"),
func.week(AcTests.last_test_submit_time).label("week"),
func.year(AcTests.last_test_submit_time).label("year"),
)
.join(AcUsers.ac_tests)
.group_by(
func.week(AcTests.last_test_submit_time),
func.year(AcTests.last_test_submit_time)
)
.filter(AcTests.last_test_submit_time != None)
.all()
)

How to combine sub queries, order by and grouping of max records?

I have the following table for recording metrics of sports players per match:
class MatchResult(Base):
__tablename__ = 'match_result'
id = Column(Integer, primary_key=True)
p1_id= Column(Integer, index=True)
p1_metric = Column(Float)
p2_id= Column(Integer, index=True)
p2_metric = Column(Float)
I want to get the maximum metric for every unique player id (whether they were p1 or p2) and ordered by metric.
This is as far as I have got:
p1 = session.query(
MatchResult.p1_id.label("player_id"),
MatchResult.p1_metric.label("metric")
).subquery()
p2 = session.query(
MatchResult.p2_id.label("player_id"),
MatchResult.p2_metric.label("metric")
).subquery()
union = p1.union(p2)
upset = session.query(
union.c.player_id,
func.max(union.c.metric).label("max_metric")
)
upset = upset.group_by(union.c.player_id)
upset = upset.order_by(union.c.max_metric)
But I'm hitting an issue that I can't union two sub queries so I get the error:
AttributeError: 'Alias' object has no attribute 'union'
How would I solve this issue?

SQLAlchemy subquery comparison

I have the following set of tables:
class Job(db.Model):
__tablename__ = 'jobs'
id = db.Column(db.Integer, primary_key=True)
class Informant(db.Model):
__tablename__ = 'informants'
id = db.Column(db.Integer, primary_key=True)
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
max_students = db.Column(db.Integer)
class Student(db.Model):
__tablename__ = 'students'
id = db.Column(db.Integer, primary_key=True)
queues = db.Table('queues',
db.Column('student_id', db.Integer, db.ForeignKey('students.id')),
db.Column('job_id', db.Integer, db.ForeignKey('jobs.id')),
PrimaryKeyConstraint('student_id', 'job_id'))
Now I need to obtain something like:
SELECT jobs.id
FROM jobs
WHERE (
SELECT SUM(informants.max_students)
FROM informants
WHERE informants.job_id = jobs.id
) <= (
SELECT COUNT(1)
FROM queues
WHERE queues.job_id = jobs.id
)
So basically I search the jobs with an amount of students that exceed the maximal capacity, the sum of the related informants' capacities. Is there a clean way to do this in SQLAlchemy? I tried the following:
db.session.query(Job.id).filter( \
db.session.query(db.func.sum(Informant.max_students)). \
filter(Informant.job_id == Job.id) <= \
db.session.query(db.func.count(1)).select_from(queues). \
filter(queues.c.job_id == Job.id))
This yields something like SELECT jobs.id FROM jobs WHERE 0 = 1. Is there something I'm missing, because I have successfully used similar queries before. Or am I better of using db.engine.execute to execute the raw SQL?

I was close to having the right answer. The piece that was missing is an as_scalar method on both subqueries. So the final query is:
db.session.query(Job.id).filter( \
db.session.query(db.func.sum(Informant.max_students)). \
filter(Informant.job_id == Job.id).as_scalar() <= \
db.session.query(db.func.count(1)).select_from(queues). \
filter(queues.c.job_id == Job.id).as_scalar())

How to join 3 tables and perform func.sum

How do I join 3 tables, Clients, Orders and Deposits and perform func.sum on Orders.total and Deposits.total for each Client.id that exists in the database ? The query result should include columns Clients.email, func.sum(Orders.total) and func.sum(Deposits.total).
So far, I've tried different queries along the lines of :
listeclients = db.session.query(Clients,func.sum(Clients.orders.total).\
label("ctotal"),func.sum((Clients.deposits.total).\
label("dtotal"))).group_by(Client.id).all()
giving me different errors such as:
AttributeError: Neither 'InstrumentedAttribute' object nor 'Comparator' object associated with Clients.orders has an attribute 'total'
I would like to see how one does this in sqlalchemy, but I'd also settle for hints behind the logic of such a query ...
Are my mappings correct? What is the syntax of such a join? Should I use eagerload somewhere? I've had success with simpler queries, but one like this is over my head for now ! Any help is welcome, even just the logic of it in raw SQL. I'm stuck w/ this ...
class Clients(db.Model):
__tablename__ = 'clients'
id = db.Column(db.Integer, primary_key = True)
email = db.Column(db.String(60), index = True, unique = True)
adresse = db.Column(db.String(64), index = True)
telephone = db.Column(db.String(10), index = True)
confirmed = db.Column(db.Boolean, default = False)
orders = db.relationship('Orders')
deposits = db.relationship('Deposits')
class Orders(db.Model):
__tablename__ = 'orders'
id = db.Column(db.Integer, primary_key = True)
client_id = db.Column(db.Integer, db.ForeignKey('clients.id'))
total = db.Column(db.Float)
date = db.Column(db.DateTime, index = True, default=datetime.now)
client = db.relationship('Clients')
class Deposits(db.Model):
__tablename__='deposits'
id = db.Column(db.Integer, primary_key = True)
date = db.Column(db.DateTime, index = True, default=datetime.now)
client_id = db.Column(db.Integer, db.ForeignKey('clients.id'))
total = db.Column(db.Float)
cheque = db.Column(db.Boolean)
client = db.relationship('Clients')

update: updated the query below to handle properly the sum:
sq1 = (db.session.query(Orders.client_id, func.sum(Orders.total).label("ctotal"))
.group_by(Orders.client_id)).subquery("sub1")
sq2 = (db.session.query(Deposits.client_id, func.sum(Deposits.total).label("dtotal"))
.group_by(Deposits.client_id)).subquery("sub2")
q = (db.session.query(Clients, sq1.c.ctotal, sq2.c.dtotal)
.outerjoin(sq1, sq1.c.client_id == Clients.id)
.outerjoin(sq2, sq2.c.client_id == Clients.id)
)
Also, instead of defining relationships twice (which might actually fail on some versions of sqlalchemy), you can simply use backref:
class Clients(db.Model):
orders = db.relationship('Orders', backref='client')
deposits = db.relationship('Deposits', backref='client')
class Orders(db.Model):
# client = db.relationship('Clients')
class Deposits(db.Model):
# client = db.relationship('Clients')

In sql it is straightforward:
select c.email, sum(o.total), sum(d.total)
from Clients c
left join Orders o
on ...
left join Deposits d
on ...
group by c.email

SQLalchemy duplicate entry error for unknown value

I am writing a script to synchronize Adwords accounts and a local database wit Sqlalchemy.
I am following the object hierarchy of the Adwords API, so my first table is 'campaigns' and the second is 'adgroups'
here is how I define the two:
class Campaign(Base):
__tablename__ = 'aw_campaigns'
id = Column(Integer, primary_key=True)
name = Column(String(99))
impressions = Column(Integer)
serving_status = Column(String(99))
start_date = Column(String(99))
status = Column(String(99))
def __init__(self, id, name, impressions, serving_status, start_date, status):
self.id = id
self.name = name
self.impressions = impressions
self.serving_status = serving_status
self.start_date = start_date
self.status = status
class Adgroup(Base):
__tablename__ = 'aw_adgroups'
id = Column(Integer, primary_key=True) # , primary_key=True
name = Column(String(99))
camp_id = Column(Integer, ForeignKey('aw_campaigns.id')) # , ForeignKey('aw_campaigns.id')
camp_name = Column(String(99))
ctr = Column(Float)
cost = Column(Float)
impressions = Column(Integer)
clicks = Column(Integer)
status = Column(String(99))
def __init__(self, id, name, camp_id, camp_name, ctr, cost, impressions, clicks, status):
self.id = id
self.name = name
self.camp_id = camp_id
self.camp_name = camp_name
self.ctr = ctr
self.cost = cost
self.impressions = impressions
self.clicks = clicks
self.status = status
I query the API, and then build the list of objects for the lines in the Adgroup table:
adgr_query = 'SELECT CampaignId, CampaignName, Clicks, Cost, Impressions, Ctr, Id, KeywordMaxCpc, Name, Settings, Status'
adgr_page = ad_group_serv.Query(adgr_query)[0]['entries']
adgr_ins = [Adgroup(i['id'],
i['name'],
i['campaignId'],
i['campaignName'],
i['stats']['ctr'],
i['stats']['cost']['microAmount'],
i['stats']['impressions'],
i['stats']['clicks'],
i['status']) for i in adgr_page if int(i['id']) not in adgr_exist]
but when I commit I get the error:
(IntegrityError) (1062, "Duplicate entry '2147483647' for key 'PRIMARY'")
The problem is that I have no idea where that value is from.
'2147483647' in [i['id'] for i in adgr_page]
>>> False
'2147483647' in str(adgr_page)
>>> False
I am really stuck on this.

Looks like you have integer overflow somewhere.
The symptom: 2147483647 is 2**31-1 - indicates that 32 bits were used to store the number.
AdGroup.Id field has type xsd:long which has 64 bits length.
Python itself has no limitation on the size of integer value but database may has such limit.
Short solution:
Try to use BigInteger sqltype type id = Column(BigInteger, primary_key=True) and the same for camp_id and the rest xsd:long values coming from AdWords API. There is chance that SQLAlchemy will pick database specific big integer column type. Or you can use String(64) as a type for id. But in this case it you'll need extra step to generate primary key.
How many entries your query to AdWords API return? Are there more then 2**32 records? I doubt it - it is unlikely that your database will be able to handle ~4200 millions of records.
Solution 2 - long term
Although I would suggest to no trust primary key integrity to external source and would rely on database to generate primary key using autoincrement and rely on SQLAlchemy to handle foreign keys population based on database generated primary keys:
class Adgroup(Base):
__tablename__ = 'aw_adgroups'
id = Column(Integer, Sequence('adgroup_seq'), primary_key=True) # , primary_key=True
adGroupId = Column(String(64))
campaignId = Column(Integer,ForeignKey('aw_campaigns.id'))
campaign = relationship("Campaign", backref = "adgroup")
...
class Campaign(Base):
__tablename__ = 'aw_campaigns'
id = Column(Integer, Sequence('adgroup_seq'), primary_key=True)
campaignId = Column(String(64))
...
Also looks like you may need to do look up by campaignId and adGroupId - so you can add indexes on them.
Then you create your Campaign and AdGroup objects and just add relations between them. The code will depend on type of relationship your want to use - one-to-many or many-to-many. Check sqlalchemy relationship manual for more details.
ag = AdGroup(**kwargs)
camp = Campaign(**kwargs)
ag.campaign = camp
session.add(ag)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

SQLalchemy top 3 results from each category - python

Related

How to group by func.week, year etc in Association proxy

How to combine sub queries, order by and grouping of max records?

SQLAlchemy subquery comparison

How to join 3 tables and perform func.sum

SQLalchemy duplicate entry error for unknown value

Categories

Resources