Creating a query with few related tables in Pyramid with SQLAlchemy - python

I have defined few tables in Pyramid like this:
# coding: utf-8
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Float, DateTime, ForeignKey, ForeignKeyConstraint, String, Column
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref,
from zope.sqlalchemy import ZopeTransactionExtension
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
Base = declarative_base()
class Codes(Base):
__tablename__ = 'Code'
__table_args__ = {u'schema': 'Locations'}
id = Column(Integer, nullable=False)
code_str = Column(String(9), primary_key=True)
name = Column(String(100))
incoming = relationship(u'Voyages', primaryjoin='Voyage.call == Codes.code_str', backref=backref('Code'))
class Locations(Base):
__tablename__ = 'Location'
__table_args__ = {u'schema': 'Locations'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
timestamp = Column(DateTime, primary_key=True, nullable=False)
longitude = Column(Float)
latitude = Column(Float)
class Voyages(Base):
__tablename__ = 'Voyage'
__table_args__ = (ForeignKeyConstraint(['unit_id', 'Voyage_id'], [u'Locations.Voyages.unit_id', u'Locations.Voyages.voyage_id'], ondelete=u'RESTRICT', onupdate=u'CASCADE'), {u'schema': 'Locations'}
)
uid = Column(Integer, primary_key=True)
unit_id = Column(Integer)
voyage_id = Column(Integer)
departure = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
call = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
departure_date = Column(DateTime)
voyage_departure = relationship(u'Codes', primaryjoin='Voyage.departure == Codes.code_str')
voyage_call = relationship(u'Codes', primaryjoin='Voyage.call == Codes.code_str')
class Definitions(Base):
__tablename__ = 'Definition'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(Integer, primary_key=True)
name = Column(String(90))
type = Column(ForeignKey(u'Structure.Type.id', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
locations = relationship(u'Locations', backref=backref('Definition'))
dimensions = relationship(u'Dimensions', backref=backref('Definition'))
types = relationship(u'Types', backref=backref('Definition'))
voyages = relationship(u'Voyages', backref=backref('Definition'))
class Dimensions(Base):
__tablename__ = 'Dimension'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
length = Column(Float)
class Types(Base):
__tablename__ = 'Type'
__table_args__ = {u'schema': 'Structure'}
id = Column(SmallInteger, primary_key=True)
type_name = Column(String(255))
type_description = Column(String(255))
What I am trying to do here is to find a specific row from Codes table (filter it by code_str) and get all related tables in return, but under the condition that Location table returns only the last row by timestamp, Voyage table must return only the last row by departure, and it must have all information from Definitions table.
I started to create a query from the scratch and came across something like this:
string_to_search = request.matchdict.get('code')
sub_dest = DBSession.query(func.max(Voyage.departure).label('latest_voyage_timestamp'), Voyage.unit_id, Voyage.call.label('destination_call')).\
filter(Voyage.call== string_to_search).\
group_by(Voyage.unit_id, Voyage.call).\
subquery()
query = DBSession.query(Codes, Voyage).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
but I have notice that when I iterate through my results (like for code, voyage in query) I am actually iterating every Voyage I get in return. In theory it is not a big problem for me but I am trying to construct some json response with basic information from Codes table which would include all possible Voyages (if there is any at all).
For example:
code_data = {}
all_units = []
for code, voyage in query:
if code_data is not {}:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure_date) if voyage.departure_date else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
'voyages': exception.message if exception else all_units,
}
Now, this seems a bit wrong because I don't like rewriting this code_data in each loop, so I put if code_data is not {} line here, but I suppose it would be much better (logical) to iterate in a way similar to this:
for code in query:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
for voyage in code.voyages:
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure) if voyage.departure else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
}
So, to get only single Code in return (since I queried the db for that specific Code) which would then have all Voyages related to it as a nested value, and of course, in each Voyage all other information related to Definition of the particular Unit...
Is my approach good at all in the first place, and how could I construct my query in order to iterate it in this second way?
I'm using Python 2.7.6, SQLAlchemy 0.9.7 and Pyramid 1.5.1 with Postgres database.
Thanks!

Try changing the outer query like so:
query = DBSession.query(Codes).options(contains_eager('incoming')).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
In case of problems, try calling the options(...) part like so:
(...) .options(contains_eager(Codes.incoming)). (...)
This should result in a single Codes instance being returned with Voyages objects accessible via the relationship you've defined (incoming), so you could proceed with:
results = query.all()
for code in results:
print code
# do something with code.incoming
# actually, you should get only one code so if it proves to work, you should
# use query.one() so that in case something else than a single Code is returned,
# an exception is thrown
of course you need an import, e.g.: from sqlalchemy.orm import contains_eager

Related

SqlAlchemy "contains_eager" does not appear to load nested relationships

I have a schema as follows:
Thing # Base class for below tables
- id
Ball (Thing)
- color
Bin (Thing)
- ball -> Ball.id
Court (Thing)
- homeBin -> Bin.id
- awayBin -> Bin.id
I'd like to ensure that whenever I load a set of Courts, it includes the latest Ball column values. From what I understand, contains_eager() might be able to help with that:
Indicate that the given attribute should be eagerly loaded from columns stated manually in the query.
I have a test that queries every few seconds for any Courts. I'm finding that, even with contains_eager, I only ever see the same value for Ball.color, even though I've explicitly updated the column's value in the database.
Why does sqlalchemy appear to reuse this old data?
Below is a working example of what's happening:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Thing(Base):
__tablename__ = "Things"
id = Column(Integer, primary_key=True)
name = Column(String(256))
thingType = Column(String(256))
__mapper_args__ = {
'with_polymorphic':'*',
'polymorphic_on':"thingType",
'polymorphic_identity':"thing"
}
class Ball(Thing):
__tablename__ = "Balls"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
color = Column('color', String(256))
__mapper_args__ = {
'polymorphic_identity':'ball'
}
class Bin(Thing):
__tablename__ = "Bins"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
shape = Column('shape', String(256))
ballId = Column('ballId', Integer, ForeignKey('Balls.id'))
ball = relationship(Ball, foreign_keys=[ballId], backref="outputBins")
__mapper_args__ = {
'polymorphic_identity':'bin'
}
pass
class Court(Thing):
__tablename__ = "Courts"
id = Column('id', Integer, ForeignKey('Things.id'), primary_key=True)
homeBinId = Column('homeBinId', Integer, ForeignKey('Bins.id'))
awayBinId = Column('awayBinId', Integer, ForeignKey('Bins.id'))
homeBin = relationship(Bin, foreign_keys=[homeBinId], backref="homeCourts")
awayBin = relationship(Bin, foreign_keys=[awayBinId], backref="awayCourts")
__mapper_args__ = {
'polymorphic_identity':'court'
}
metadata = MetaData()
engine = create_engine("postgresql://localhost:5432/")
Session = sessionmaker(bind=engine)
session = Session()
def courtQuery():
awayBalls = aliased(Ball, name="awayBalls")
homeBalls = aliased(Ball, name="homeBalls")
awayBins = aliased(Bin, name="awayBins")
homeBins = aliased(Bin, name="homeBins")
query = session.query(Court)\
.outerjoin(awayBins, Court.awayBinId == awayBins.id)\
.outerjoin(awayBalls, awayBins.ballId == awayBalls.id)\
.outerjoin(homeBins, Court.homeBinId == homeBins.id)\
.outerjoin(homeBalls, homeBins.ballId == homeBalls.id)\
.options(contains_eager(Court.awayBin, alias=awayBins).contains_eager(awayBins.ball, alias=awayBalls))\
.options(contains_eager(Court.homeBin, alias=homeBins).contains_eager(homeBins.ball, alias=homeBalls))
return [r for r in query]
import time
while(True):
results = courtQuery()
court = results[0]
ball = court.homeBin.ball
print(ball.color) # does not change
time.sleep(2)
Environment:
Python 2.7.14
SqlAlchemy 1.3.0b1
PostGres 11.3 (though I've seen this
on Oracle as well)

SQLAlchemy query not iterating over all results from a query

I'm experiencing some odd behavior with SQLAlchemy not iterating over all of the results from a query.
For example, I have the following python code:
engine = create_engine(<connection string>)
Session = sessionmaker(bind=engine)
session = Session()
columns = session.query(Column)
counter = 1
for c in columns
print(counter)
counter = counter + 1
print('count: ' + str(columns.count()))
where Column is a class that I've defined and mapped in the usual SQLAlchemy way:
from base import Base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Boolean
class Column(Base):
__tablename__ = 'COLUMNS'
__table_args__ = {'schema' : 'INFORMATION_SCHEMA'}
table_catalog = Column(String)
table_schema = Column(String)
table_name = Column(String)
column_name = Column(String, primary_key=True)
data_type = Column(String)
From my query, I'm expecting 7034 rows to be returned and this is what the final print statement prints out (for columns.count()), but the for loop only ever gets up to 2951 printing out counter.
If I do anything else with the returned data in the for loop, only the 2951 get processed, not all 7034.
Does anyone know why I'm experiencing this discrepancy, and how can I iterate over all 7034 rows, not just the 2951?
I've figured out why I wasn't getting the results I was expecting (I did something silly).
The 'column_name' field in the table the Column class maps to ins't unique, therefore picking it as a primary key filtered out only unique values - which since there are duplicates results in less rows being returned than I expected.
I fixed it by updating the definition of the Column mapping to:
from base import Base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Boolean
class Column(Base):
__tablename__ = 'COLUMNS'
__table_args__ = {'schema' : 'INFORMATION_SCHEMA'}
table_catalog = Column(String, primary_key=True)
table_schema = Column(String, primary_key=True)
table_name = Column(String, primary_key=True)
column_name = Column(String, primary_key=True)
data_type = Column(String)

SQLAlchemy: dynamically apply filter on relationship column ahead of time

I have two SQLAlchemy classes representing a many-to-one relationship, say:
class Person
id = Column(Integer(), primary_key=True)
name = Column(String(30))
known_addresses = relationship('Address', backref='person')
class Address:
id = Column(Integer(), primary_key=True)
person_id = Column(Integer(), ForeignKey(Person.id, ondelete='cascade'))
city = Column(String(30))
zip_code = Column(String(10))
Now, say I have a function that returns a Person queryset (a Select object) filtered by zip codes:
def get_persons_in_zip_code(zip_code):
return session.query(Person).\
join(Address).\
where(Address.zip_code == zip_code)
Once I return the query set, I have no control over it and it is expected that this will encapsulate all the data that the framework I'm using (in my case Django/DRF) to render a list of persons along with their addresses (so the code iterates the query set, calling .addresses for each person and rendering that as well).
Here's the catch: I want to ensure that calling .addresses will return only the addresses matched in the original, zip_code filtered query - not all addresses related to that person.
Is there a way to achieve this in SQLAlchemy without accessing Person objects returned at later stages? That is, I can only modify my get_persons_in_zip_code function, or the original SQLAlchemy classes, but have no access to the Person objects returned from the query, as that happens deep inside the framework rendering code.
EDIT: It's also important that calling count() on the returned query object yields the number of expected Person objects, not the number of Address objects.
It seems like what you are looking for is contains_eager.
EDIT: An updated version that monkeypatches the .count() function to return only the distinct Person count.
from sqlalchemy import Integer, Column, String, ForeignKey
from sqlalchemy import create_engine, func, distinct
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker, contains_eager
from types import MethodType
engine = create_engine('sqlite:///:memory:', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
class Person(Base):
__tablename__ = "person"
id = Column(Integer(), primary_key=True)
name = Column(String(30))
known_addresses = relationship('Address', backref='person')
def __repr__(self):
return "<Person {}>".format(self.name)
class Address(Base):
__tablename__ = "address"
id = Column(Integer(), primary_key=True)
person_id = Column(Integer(), ForeignKey(Person.id, ondelete='cascade'))
city = Column(String(30))
zip_code = Column(String(10))
def __repr__(self):
return "<Address {}>".format(self.zip_code)
Base.metadata.create_all(engine)
p1 = Person(name="P1")
session.add(p1)
p2 = Person(name="P2")
session.add(p2)
session.commit()
a1 = Address(person_id=p1.id, zip_code="123")
session.add(a1)
a2 = Address(person_id=p1.id, zip_code="345")
session.add(a2)
a3 = Address(person_id=p2.id, zip_code="123")
session.add(a3)
a4 = Address(person_id=p1.id, zip_code="123")
session.add(a4)
session.commit()
def get_persons_in_zip_code(zip_code):
return session.query(Person).\
join(Person.known_addresses).\
filter(Address.zip_code == zip_code).\
options(contains_eager(Person.known_addresses))
def distinct_person_count(q):
count_q = q.statement.with_only_columns([func.count(distinct(Person.id))])
return q.session.execute(count_q).scalar()
results = get_persons_in_zip_code("123")
results.count = MethodType(distinct_person_count, results)
print(results.count())
for person in results:
print(person)
for address in person.known_addresses:
print(address)
Output:
2
<Person P1>
<Address 123>
<Address 123>
<Person P2>
<Address 123>

SQLAlchemy adding column with aggregate function to a dynamic loader list (AppenderQuery)

I get an incorrect record set, while adding an aggregate function like func.sum on a dynamic relationship. I have listed out a sample code below to demonstrate this.
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import (
relationship,
scoped_session,
sessionmaker,
backref
)
from sqlalchemy import (
create_engine,
Table,
Column,
Integer,
String,
ForeignKey,
func
)
from zope.sqlalchemy import ZopeTransactionExtension
import transaction
Base = declarative_base()
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
userid = Column(String(15), unique=True, nullable=False)
article_list = relationship("Article", backref="user", lazy="dynamic")
class Tag(Base):
__tablename__ = 'tags'
id = Column(Integer, primary_key=True)
name = Column(String(25), nullable=False, unique=True)
class Article(Base):
__tablename__ = 'articles'
id = Column(Integer, primary_key=True)
title = Column(String(25), nullable=False)
duration = Column(Integer)
user_id = Column(Integer, ForeignKey('users.id'), nullable=False)
tags = relationship('Tag', secondary="tag_map",
backref=backref("article_list", lazy="dynamic"))
tag_map_table = Table(
'tag_map', Base.metadata,
Column('tag_id', Integer, ForeignKey('tags.id'), nullable=False),
Column('article_id', Integer, ForeignKey('articles.id'), nullable=False))
engine = create_engine('sqlite:///tag_test.sqlite')
DBSession.configure(bind=engine)
Base.metadata.create_all(engine)
with transaction.manager:
t1 = Tag(name='software')
t2 = Tag(name='hardware')
john = User(userid='john')
a1 = Article(title='First article', duration=300)
a1.user = john
a1.tags.append(t1)
a1.tags.append(t2)
DBSession.add(a1)
a2 = Article(title='Second article', duration=50)
a2.user = john
a2.tags.append(t1)
a2.tags.append(t2)
DBSession.add(a1)
As we see above in the code, I have added two tags for both the articles. Now I want to query the articles written by the user 'John' grouped by tags along with it I want to find the sum of each tag duration.
john = DBSession.query(User).filter(User.userid=='john').first()
res = john.article_list.join(Article.tags).add_column(
func.sum(Article.duration)).group_by(Tag.id)
for article, tsum in res:
print ("Article : %s, Sum duration : %d" % (article.title, tsum))
The query generated for res is
SELECT articles.id AS articles_id, articles.title AS articles_title, articles.duration AS articles_duration, articles.user_id AS articles_user_id, sum(articles.duration) AS sum_1
FROM articles JOIN tag_map AS tag_map_1 ON articles.id = tag_map_1.article_id JOIN tags ON tags.id = tag_map_1.tag_id
WHERE :param_1 = articles.user_id GROUP BY tags.id
which when executed directly on the sqlite database yields two rows corresponding to the two tags
2|Second article|50|1|350
2|Second article|50|1|350
Whereas, the results returned by SQLAlchemy reflect only one row
Article : Second article, Sum duration : 350
But, if I add an extra column to contain tag-name in the AppenderQuery object
res = john.article_list.join(Article.tags).add_column(Tag.name).add_column(
func.sum(Article.duration)).group_by(Tag.id)
for article, tag_name, tsum in res:
print ("Article : %s, Tag : %s, Sum duration : %d" % (
article.title, tag_name, tsum))
I get proper results
Article : Second article, Tag : software, Sum duration : 350
Article : Second article, Tag : hardware, Sum duration : 350
So, what is the right way of using aggregate functions on AppenderQuery object in order to get categorized results?

SQL Alchemy NULL identity key

I've defined the model's id field of the table like this:
id = Column(Integer(15, unsigned=True),
nullable=False,
server_default='0',
primary_key=True,
unique=True,
autoincrement=True)
and altered the database(MySQL) table accordingly but still when I create the model
and try to commit it (Im using SQLalchemy 0.7.8)
m = MyModel(values without defining the id)
session.add(m)
session.commit()
I get this error
FlushError: Instance <MyModel at 0x4566990> has a NULL identity key.
If this is an auto-generated value, check that the database table
allows generation of new primary key values, and that the mapped
Column object is configured to expect these generated values. Ensure
also that this flush() is not occurring at an inappropriate time, such
as within a load() event.
I use Postgres 13 and the type of ID is UUIT data type. I met the same issue.
I have solved it by applying server_default.
class TrafficLightController(Base):
__tablename__ = 'Tlc'
id = Column(UUID, primary_key=True, server_default='uuid_generate_v4()')
type_id = Column('type_id', UUID)
title = Column('title', String(100))
gps_x = Column('gps_x', Float)
gps_y = Column('gps_y', Float)
hardware_config = Column('hardware_config', JSONB)
lcu_id = Column('lcu_id', UUID)
signal_id = Column('signal_id', UUID)
def __init__(self, type_id, title, gps_x, gps_y, hardware_config, lcu_id, signal_id):
self.type_id = type_id
self.title = title
self.gps_x = gps_x
self.gps_y = gps_y
self.hardware_config = hardware_config
self.lcu_id = lcu_id
self.signal_id = signal_id
if __name__ == "__main__":
dbschema = 'asudd'
engine = create_engine(DB_CONNECTION_STR, connect_args={'options': '- csearch_path={}'.format(dbschema)})
Session = sessionmaker(bind=engine)
Base = declarative_base()
Base.metadata.create_all(engine)
session = Session()
tlc_obj = TrafficLightController("b0322313-0995-40ac-889c-c65702e1841e", "test DK", 35, 45, "{}", None, None)
session.add(tlc_obj)
I solved it by removing the server_default value

Categories

Resources