SQLAlchemy mapping a table union to a class - python

I'm using SQLAlchemy to query a number of similar tables, and union the results. The tables are rows of customer information, but our current database structures it so that different groups of customers are in their own tables e.g. client_group1, client_group2, client_group3:
client_group1:
| id | name | email |
| 1 | john | johnsmith#gmail.com |
| 2 | greg | gregjones#gmail.com |
Each of the other tables have identical columns. If I'm using SQLAlchemy declarative_base, I can have a class for client_group1 like the following:
def ClientGroup1(Base):
__tablename__ = 'client_group1'
__table_args__ = {u'schema': 'clients'}
id = Column(Integer, primary_key=True)
name = Column(String(32))
email = Column(String(32))
Then I can do queries such as:
session.query(ClientGroup1.name)
However, if I use union_all to combine a bunch of client tables into a viewport, such as:
query1 = session.query(ClientGroup1.name)
query2 = session.query(ClientGroup2.name)
viewport = union_all(query1, query2)
then I'm not sure how to map a viewport to an object, and instead I have to access viewport columns using:
viewport.c.name
Is there any way to map the viewport to a specific table structure? Especially considering the fact that each class points to a different __table_name__

Read Concrete Table Inheritance documentation for the idea how this can be done. The code below is a running example of how this can be done:
from sqlalchemy import create_engine, Column, String, Integer
from sqlalchemy.orm import sessionmaker, configure_mappers
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.declarative import AbstractConcreteBase
engine = create_engine('sqlite:///:memory:', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base(engine)
class ClientGroupBase(AbstractConcreteBase, Base):
pass
class ClientGroup1(ClientGroupBase):
__tablename__ = 'client_group1'
# __table_args__ = {'schema': 'clients'}
__mapper_args__ = {
'polymorphic_identity': 'client_group1',
'concrete': True,
}
id = Column(Integer, primary_key=True)
name = Column(String(32))
email = Column(String(32))
class ClientGroup2(ClientGroupBase):
__tablename__ = 'client_group2'
# __table_args__ = {'schema': 'clients'}
__mapper_args__ = {
'polymorphic_identity': 'client_group2',
'concrete': True,
}
id = Column(Integer, primary_key=True)
name = Column(String(32))
email = Column(String(32))
def _test_model():
# generate classes for all tables
Base.metadata.create_all()
print('-'*80)
# configure mappers (see documentation)
configure_mappers()
print('-'*80)
# add some test data
session.add(ClientGroup1(name="name1"))
session.add(ClientGroup1(name="name1"))
session.add(ClientGroup2(name="name1"))
session.add(ClientGroup2(name="name1"))
session.commit()
print('-'*80)
# perform a query
q = session.query(ClientGroupBase).all()
for r in q:
print(r)
if __name__ == '__main__':
_test_model()
The above example has an added benefit that you can also create new objects, as well as query only some tables.
You could do it mapping an SQL VIEW to a class, but you need to specify a primary key explicitly (see Is possible to mapping view with class using mapper in SqlAlchemy?). In you case, I am afraid, this might not work because of the same PK value in multiple tables, and using a multi-column PK might not be the best idea.

Related

Create one-to-one and one-to-many between two tables in sqlalchemy

What I'm trying to do seems simple. I'd like to have a parent Organization which has child Groups. However, one group will be the "main_group". Each Organization should have a main group. Each Group should have a reference to the Organization in which it belongs.
I've been able to create_all and use these Models but in my tests when I do a drop_all, I get a
sqlalchemy.exc.CircularDependencyError: Can't sort tables for DROP; an unresolvable foreign key dependency exists between tables: groups, organizations. Please ensure that the ForeignKey and ForeignKeyConstraint objects involved in the cycle have names so that they can be dropped using DROP CONSTRAINT.
Here is a minimum example to show what I'm trying to do. I've left some commented lines in to show what all I've tried.
from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import ForeignKey
from sqlalchemy.sql.sqltypes import Boolean, Date, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, ForeignKey, String, Column
Base = declarative_base()
class Organization(Base):
__tablename__ = "organizations"
id = Column(Integer(), primary_key=True)
name = Column(String(100))
### Define one-to-one with Group
main_group_id = Column(Integer, ForeignKey("groups.id"))
main_group = relationship(
"Group",
# uselist=False,
# back_populates="organization",
foreign_keys=[main_group_id],
primaryjoin="Group.id==Organization.main_group_id",
post_update=True
# backref="organization"
)
####
## Defines the one-to-many collection with Group
# groups = relationship(
# "Group",
# uselist=True,
# # foreign_keys="Group.organization_id",
# primaryjoin="Group.id==Organization.main_group_id",
# # back_populates="organization",
# )
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True)
name = Column(String(100))
organization_id = Column(Integer, ForeignKey("organizations.id"), nullable=False)
organization = relationship(
"Organization",
uselist=False,
# backref="groups",
foreign_keys=[organization_id],
primaryjoin="Group.organization_id==Organization.id",
# primaryjoin="Group.id==Organization.main_group_id",
post_update=True,
)
from sqlalchemy import create_engine
from sqlalchemy.orm.session import sessionmaker
conn_string = "sqlite:///example.sql"
engine = create_engine(conn_string)
Base.metadata.create_all(engine) # here we create all tables
Session = sessionmaker(bind=engine)
session = Session()
new_org = Organization(name="my new org")
print(new_org)
session.add(new_org)
org = session.query(Organization).get(1)
new_group = Group(name="my main group", organization=org, organization_id=org.id)
new_org.main_group = new_group
session.commit()
Base.metadata.drop_all(engine)
From The Docs
There is actually an example pretty similar to what you want here, which uses an explicitly named foreign key, fk_favorite_entry:
rows-that-point-to-themselves-mutually-dependent-rows
That doesn't seem to fully solve the drop warning, which maybe seems to be dialect dependent. You can fully solve it with use_alter=True which is explained here:
creating-dropping-foreign-key-constraints-via-alter
Likely Solution
Best case would be to name the atypical constraint and to set use_alter=True on it as well.
class Organization(Base):
# ...
main_group_id = Column(Integer, ForeignKey("groups.id", name="fk_main_group", use_alter=True))
# ...

SQLAlchemy: dynamically apply filter on relationship column ahead of time

I have two SQLAlchemy classes representing a many-to-one relationship, say:
class Person
id = Column(Integer(), primary_key=True)
name = Column(String(30))
known_addresses = relationship('Address', backref='person')
class Address:
id = Column(Integer(), primary_key=True)
person_id = Column(Integer(), ForeignKey(Person.id, ondelete='cascade'))
city = Column(String(30))
zip_code = Column(String(10))
Now, say I have a function that returns a Person queryset (a Select object) filtered by zip codes:
def get_persons_in_zip_code(zip_code):
return session.query(Person).\
join(Address).\
where(Address.zip_code == zip_code)
Once I return the query set, I have no control over it and it is expected that this will encapsulate all the data that the framework I'm using (in my case Django/DRF) to render a list of persons along with their addresses (so the code iterates the query set, calling .addresses for each person and rendering that as well).
Here's the catch: I want to ensure that calling .addresses will return only the addresses matched in the original, zip_code filtered query - not all addresses related to that person.
Is there a way to achieve this in SQLAlchemy without accessing Person objects returned at later stages? That is, I can only modify my get_persons_in_zip_code function, or the original SQLAlchemy classes, but have no access to the Person objects returned from the query, as that happens deep inside the framework rendering code.
EDIT: It's also important that calling count() on the returned query object yields the number of expected Person objects, not the number of Address objects.
It seems like what you are looking for is contains_eager.
EDIT: An updated version that monkeypatches the .count() function to return only the distinct Person count.
from sqlalchemy import Integer, Column, String, ForeignKey
from sqlalchemy import create_engine, func, distinct
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker, contains_eager
from types import MethodType
engine = create_engine('sqlite:///:memory:', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
class Person(Base):
__tablename__ = "person"
id = Column(Integer(), primary_key=True)
name = Column(String(30))
known_addresses = relationship('Address', backref='person')
def __repr__(self):
return "<Person {}>".format(self.name)
class Address(Base):
__tablename__ = "address"
id = Column(Integer(), primary_key=True)
person_id = Column(Integer(), ForeignKey(Person.id, ondelete='cascade'))
city = Column(String(30))
zip_code = Column(String(10))
def __repr__(self):
return "<Address {}>".format(self.zip_code)
Base.metadata.create_all(engine)
p1 = Person(name="P1")
session.add(p1)
p2 = Person(name="P2")
session.add(p2)
session.commit()
a1 = Address(person_id=p1.id, zip_code="123")
session.add(a1)
a2 = Address(person_id=p1.id, zip_code="345")
session.add(a2)
a3 = Address(person_id=p2.id, zip_code="123")
session.add(a3)
a4 = Address(person_id=p1.id, zip_code="123")
session.add(a4)
session.commit()
def get_persons_in_zip_code(zip_code):
return session.query(Person).\
join(Person.known_addresses).\
filter(Address.zip_code == zip_code).\
options(contains_eager(Person.known_addresses))
def distinct_person_count(q):
count_q = q.statement.with_only_columns([func.count(distinct(Person.id))])
return q.session.execute(count_q).scalar()
results = get_persons_in_zip_code("123")
results.count = MethodType(distinct_person_count, results)
print(results.count())
for person in results:
print(person)
for address in person.known_addresses:
print(address)
Output:
2
<Person P1>
<Address 123>
<Address 123>
<Person P2>
<Address 123>

How to use sqlAlchemy table shema to load data

I have two scripts schema.py and load_data.py. In schema.py, I define the schema for over 20 tables using sqlAlchemy Base. Two of the tables look like:
schema.py
Base = declarative_base()
meta = MetaData()
class Table1(Base):
__tablename__ = 'table1'
id = Column(Integer, primary_key=True)
name = Column(String)
class Table2(Base):
__tablename__ = 'table2'
id = Column(Integer, primary_key=True)
bdate = Column(Date)
...
class Table20(Base):
__tablename__ = 'table20'
id = Column(Integer, primary_key=True)
bdate = Column(Date)
I want to use my load_data.py to copy those ~20 tables from one database to another. My question is how to create the table in load_data.py using the schema I defined in schema.py?? Following the examples in Introductory Tutorial of Python’s SQLAlchemy, I use import to load all the table schema class, but I find it too messy. Is there any better way to handle this situation??? I am new to sqlAlchemy,please bear me if this question seems too naive.
load_data.py
from schema import Base, Table1, Table2, Table3, Table4, Table5, Table6, Table7, Table8, Table9, Table10,..., Table20
src_engine = create_engine('sqlite:// sqlite_test.db')
dst_engine = create_engine('postgresql:///postgresql_test.db')
Base.metadata.create_all(dst_engine)
tables = Base.metadata.tables
for tbl in tables:
data = src_engine.execute(tables[tbl].select()).fetchall()
for a in data: print(a)
if data:
dst_engine.execute( tables[tbl].insert(), data)
Try from schema import *, which imports all members from a module. See also these answers regarding the difference between import schema and from schema import x.

Creating a query with few related tables in Pyramid with SQLAlchemy

I have defined few tables in Pyramid like this:
# coding: utf-8
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Float, DateTime, ForeignKey, ForeignKeyConstraint, String, Column
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref,
from zope.sqlalchemy import ZopeTransactionExtension
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
Base = declarative_base()
class Codes(Base):
__tablename__ = 'Code'
__table_args__ = {u'schema': 'Locations'}
id = Column(Integer, nullable=False)
code_str = Column(String(9), primary_key=True)
name = Column(String(100))
incoming = relationship(u'Voyages', primaryjoin='Voyage.call == Codes.code_str', backref=backref('Code'))
class Locations(Base):
__tablename__ = 'Location'
__table_args__ = {u'schema': 'Locations'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
timestamp = Column(DateTime, primary_key=True, nullable=False)
longitude = Column(Float)
latitude = Column(Float)
class Voyages(Base):
__tablename__ = 'Voyage'
__table_args__ = (ForeignKeyConstraint(['unit_id', 'Voyage_id'], [u'Locations.Voyages.unit_id', u'Locations.Voyages.voyage_id'], ondelete=u'RESTRICT', onupdate=u'CASCADE'), {u'schema': 'Locations'}
)
uid = Column(Integer, primary_key=True)
unit_id = Column(Integer)
voyage_id = Column(Integer)
departure = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
call = Column(ForeignKey(u'Locations.Code.code_str', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
departure_date = Column(DateTime)
voyage_departure = relationship(u'Codes', primaryjoin='Voyage.departure == Codes.code_str')
voyage_call = relationship(u'Codes', primaryjoin='Voyage.call == Codes.code_str')
class Definitions(Base):
__tablename__ = 'Definition'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(Integer, primary_key=True)
name = Column(String(90))
type = Column(ForeignKey(u'Structure.Type.id', ondelete=u'RESTRICT', onupdate=u'CASCADE'))
locations = relationship(u'Locations', backref=backref('Definition'))
dimensions = relationship(u'Dimensions', backref=backref('Definition'))
types = relationship(u'Types', backref=backref('Definition'))
voyages = relationship(u'Voyages', backref=backref('Definition'))
class Dimensions(Base):
__tablename__ = 'Dimension'
__table_args__ = {u'schema': 'Structure'}
unit_id = Column(ForeignKey(u'Structure.Definition.unit_id', ondelete=u'RESTRICT', onupdate=u'CASCADE'), primary_key=True, nullable=False)
length = Column(Float)
class Types(Base):
__tablename__ = 'Type'
__table_args__ = {u'schema': 'Structure'}
id = Column(SmallInteger, primary_key=True)
type_name = Column(String(255))
type_description = Column(String(255))
What I am trying to do here is to find a specific row from Codes table (filter it by code_str) and get all related tables in return, but under the condition that Location table returns only the last row by timestamp, Voyage table must return only the last row by departure, and it must have all information from Definitions table.
I started to create a query from the scratch and came across something like this:
string_to_search = request.matchdict.get('code')
sub_dest = DBSession.query(func.max(Voyage.departure).label('latest_voyage_timestamp'), Voyage.unit_id, Voyage.call.label('destination_call')).\
filter(Voyage.call== string_to_search).\
group_by(Voyage.unit_id, Voyage.call).\
subquery()
query = DBSession.query(Codes, Voyage).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
but I have notice that when I iterate through my results (like for code, voyage in query) I am actually iterating every Voyage I get in return. In theory it is not a big problem for me but I am trying to construct some json response with basic information from Codes table which would include all possible Voyages (if there is any at all).
For example:
code_data = {}
all_units = []
for code, voyage in query:
if code_data is not {}:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure_date) if voyage.departure_date else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
'voyages': exception.message if exception else all_units,
}
Now, this seems a bit wrong because I don't like rewriting this code_data in each loop, so I put if code_data is not {} line here, but I suppose it would be much better (logical) to iterate in a way similar to this:
for code in query:
code_data = {
'code_id': code.id,
'code_str': code.code_str,
'code_name': code.name,
}
for voyage in code.voyages:
single_unit = {
'unit_id': voyage.unit_id,
'unit_departure': str(voyage.departure) if voyage.departure else None,
}
all_units.append(single_unit)
return {
'code_data': exception.message if exception else code_data,
}
So, to get only single Code in return (since I queried the db for that specific Code) which would then have all Voyages related to it as a nested value, and of course, in each Voyage all other information related to Definition of the particular Unit...
Is my approach good at all in the first place, and how could I construct my query in order to iterate it in this second way?
I'm using Python 2.7.6, SQLAlchemy 0.9.7 and Pyramid 1.5.1 with Postgres database.
Thanks!
Try changing the outer query like so:
query = DBSession.query(Codes).options(contains_eager('incoming')).\
join(sub_dest, sub_dest.c.destination_call == Codes.code_str).\
outerjoin(Voyage, sub_dest.c.latest_voyage_timestamp == Voyage.departure_date)
In case of problems, try calling the options(...) part like so:
(...) .options(contains_eager(Codes.incoming)). (...)
This should result in a single Codes instance being returned with Voyages objects accessible via the relationship you've defined (incoming), so you could proceed with:
results = query.all()
for code in results:
print code
# do something with code.incoming
# actually, you should get only one code so if it proves to work, you should
# use query.one() so that in case something else than a single Code is returned,
# an exception is thrown
of course you need an import, e.g.: from sqlalchemy.orm import contains_eager

Why isn't SQLAlchemy creating serial columns?

SQLAlchemy is generating, but not enabling, sequences for columns in postgresql. I suspect I may be doing something wrong in engine setup.
Using an example from the SQLAlchemy tutorial (http://docs.sqlalchemy.org/en/rel_0_9/orm/tutorial.html):
#!/usr/bin/env python
from sqlalchemy import create_engine, Column, Integer, String, Sequence
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, Sequence('user_id_seq'), primary_key=True)
name = Column(String(50))
fullname = Column(String(50))
password = Column(String(12))
def __repr__(self):
return "<User(name='%s', fullname='%s', password='%s')>" % (
self.name, self.fullname, self.password)
db_url = 'postgresql://localhost/serial'
engine = create_engine(db_url, echo=True)
Base.metadata.create_all(engine)
With this script, the following table is generated:
serial=# \d+ users
Table "public.users"
Column | Type | Modifiers | Storage | Stats target | Description
----------+-----------------------+-----------+----------+--------------+-------------
id | integer | not null | plain | |
name | character varying(50) | | extended | |
fullname | character varying(50) | | extended | |
password | character varying(12) | | extended | |
Indexes:
"users_pkey" PRIMARY KEY, btree (id)
Has OIDs: no
However, a sequence was created:
serial=# select sequence_schema,sequence_name,data_type from information_schema.sequences ;
sequence_schema | sequence_name | data_type
-----------------+---------------+-----------
public | user_id_seq | bigint
SQLAlchemy 0.9.1, Python 2.7.5+, Postgresql 9.3.1, Ubuntu 13.10
-Reece
this is because you provided it with an explicit Sequence. The SERIAL datatype in postgresql generates its own sequence, which SQLAlchemy knows how to locate - so if you omit the Sequence, SQLAlchemy will render SERIAL, assuming the intent is that the column is auto-incrementing (which is determined by the autoincrement argument in conjunction with Integer primary_key; it defaults to True). But when Sequence is passed, SQLAlchemy sees the intent that you don't want the sequence implicitly created by SERIAL but instead the one you are specifying:
from sqlalchemy import create_engine, Column, Integer, String, Sequence
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class T1(Base):
__tablename__ = 't1'
# emits CREATE SEQUENCE + INTEGER
id = Column(Integer, Sequence('user_id_seq'), primary_key=True)
class T2(Base):
__tablename__ = 't2'
# emits SERIAL
id = Column(Integer, primary_key=True)
class T3(Base):
__tablename__ = 't3'
# emits INTEGER
id = Column(Integer, autoincrement=False, primary_key=True)
engine = create_engine("postgresql://scott:tiger#localhost/test", echo=True)
Base.metadata.create_all(engine)
output:
CREATE SEQUENCE user_id_seq
CREATE TABLE t1 (
id INTEGER NOT NULL,
PRIMARY KEY (id)
)
CREATE TABLE t2 (
id SERIAL NOT NULL,
PRIMARY KEY (id)
)
CREATE TABLE t3 (
id INTEGER NOT NULL,
PRIMARY KEY (id)
)
If you need to create the sequence explicitly for some reason, like setting a start value, and still want the same default value behavior as when using the Column(Integer, primary_key=True) notation, it can be accomplished with the following code:
#!/usr/bin/env python
from sqlalchemy import create_engine, Column, Integer, String, Sequence
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
USER_ID_SEQ = Sequence('user_id_seq') # define sequence explicitly
class User(Base):
__tablename__ = 'users'
# use sequence in column definition, and pass .next_value() as server_default
id = Column(Integer, USER_ID_SEQ, primary_key=True, server_default=USER_ID_SEQ.next_value())
name = Column(String(50))
fullname = Column(String(50))
password = Column(String(12))
def __repr__(self):
return "<User(name='%s', fullname='%s', password='%s')>" % (
self.name, self.fullname, self.password)
db_url = 'postgresql://localhost/serial'
engine = create_engine(db_url, echo=True)
Base.metadata.create_all(engine)
Reece
I also used that tutorial as a model, and just could not get it to work with any Postgres tables that already existed and had key ID columns with serial sequences to generate the new key ID values.
Like David, I found the Sequence had to be defined separately to the class. For anyone using the "db.Model" approach, here's one example.
from flask.ext.sqlalchemy import SQLAlchemy
from sqlalchemy import Sequence
db = SQLAlchemy()
pageimpression_imp_id_seq = Sequence('pageimpression_imp_id_seq')
class PageImpression(db.Model):
__tablename__ = 'pageimpression'
imp_id = db.Column(db.Integer,
pageimpression_imp_id_seq,
server_default=usersession_sessionid_seq.next_value(),primary_key=True)
logdate = db.Column(db.DateTime)
sessionid = db.Column(db.String)
path = db.Column(db.String)
referrer = db.Column(db.String)
def __init__(self, imp_id, logdate, sessionid, path, referrer):
self.imp_id = imp_id
self.logdate = logdate
self.sessionid = sessionid
self.path = path
self.referrer = referrer
def __repr__(self):
return "<PageImpression(imp_id='%s', logdate='%s',sessionid='%s', path='%s', referrer='%s')>" % (self.imp_id, self.logdate, self.sessionid, self.path, self.referrer)
def PageImpressionAdd(sessionid):
sessionid = 0 # dummy value for unit testing
current_time = datetime.now().isoformat()
if CurrentConfig.IMPRESSION_LOGGING_ON == True:
path = request.path
if request.environ.get('HTTP_REFERER') and not request.environ.get('HTTP_REFERER').isspace():
referrer = request.environ.get('HTTP_REFERER') # the string is not-empty
else:
referrer = '' # the string is empty
from website.models import PageImpression
thisPageImpression = PageImpression(None,current_time,sessionid, path, referrer)
db.session.add(thisPageImpression)
db.session.commit()
# get the values created by the Postgres table defaults
imp_id = thisPageImpression.imp_id
logdate = thisPageImpression.logdate
return current_time
You can also change Sequence without any SQL script by GUI pgAdmin as below:
select your DB -> Schemas -> public -> Sequences -> right click -> properties -> Definition -> Current value.

Categories

Resources