SQLAlchemy doesn't autoload all my columns - python

I have an issue, where I use the following:
class Docs(Base):
__tablename__ = "docs"
__table__ = Table(__tablename__, Base.metadata,
Column("dID", Integer, primary_key=True),
Column("d_type", Integer, primary_key=True),
Column("d_category", Integer, primary_key=True),
autoload_with=get_global_db_engine())
__table_args__ = (UniqueConstraint("dID", "d_type", "d_category"),)
# Class Globals
COLUMNS = __table__.columns.keys()
Problem is, when I loop throughCOLUMNS - it doesn't list all the columns of __table__, it holds only the columns I pre-defined inside the Table ( 3 cols ).
How do I get COLUMNS to return all of them ?

You'll have to set extend_existing to extend your defined table with reflected columns.
From the documenation:
Table.extend_existing will also work in conjunction with Table.autoload to run a new reflection operation against the database, even if a Table of the same name is already present in the target MetaData; newly reflected Column objects and other options will be added into the state of the Table, potentially overwriting existing columns and options of the same name.
So,
__table__ = Table(__tablename__, Base.metadata,
Column("dID", Integer, primary_key=True),
Column("d_type", Integer, primary_key=True),
Column("d_category", Integer, primary_key=True),
extend_existing=True,
autoload_with=get_global_db_engine())
should do the trick.

Related

SQLAlchemy: Creating two classes (models) for the same table

I am interacting with a database with a star schema. I have a fact table that depends on a dimension table in two different ways.
When writing SQL by hand, I join the dimension table twice under different names. I want to do the same thing in SQLAlchemy.
After reading the docs and this thread, what I have is:
from sqlalchemy import create_engine, Column, Integer, String, TIMESTAMP, Float, ForeignKey, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relation, sessionmaker, synonym, aliased
Base = declarative_base()
class RBase(Base):
__tablename__ = "regions"
# id
id = Column("id", Integer, primary_key=True, nullable=True)
# Dimensions
region = Column("region", String, nullable=True)
_all_dimensions = ()
class Regions(RBase):
# Dimensions
__mapper_args__ = {
'polymorphic_identity': 'Regions'
}
class OtherRegions(RBase):
# Dimensions
otherregion = synonym("region")
__mapper_args__ = {
'polymorphic_identity': 'OtherRegions'
}
class FactTable(Base):
__tablename__ = "somefact"
# Dimension ids
sourceregionid = Column(
"sourceregionid", Integer, ForeignKey("regions.id"), primary_key=True, nullable=True
)
targetregionid = Column(
"targetregionid", Integer, ForeignKey("regions.id"), primary_key=True, nullable=True
)
# Facts
examplefact = Column("examplefact", Float, nullable=True)
# Relations
region = relation(Regions, innerjoin=True, foreign_keys=[sourceregionid])
otherregion = relation(OtherRegions, innerjoin=True, foreign_keys=[targetregionid])
The statement I'm getting from this is
SELECT regions.region, somefact.examplefact
FROM somefact
JOIN regions ON regions.id = somefact.sourceregionid
JOIN regions ON regions.id = somefact.targetregionid
WHERE regions.region = %(region_1)s
which causes and error. What I want is
SELECT regions.region, otherregions.region as otherregion, somefact.examplefact
FROM somefact
JOIN regions ON regions.id = somefact.sourceregionid
JOIN regions as otherregions ON otherregions.id = somefact.targetregionid
WHERE regions.region = %(region_1)s
I have spent a while going in circles about this, so any help would be much appreciated.

sqlalchemy relationship through an intermediate table

I have three related classes: Parent, Child, SubChild. The relationship is one-many in both cases. I've set it up so child.parent references correctly and of course so that sub_child.child.parent works as well.
The thing is that I never actually need to know sub_child.child, but I do need to know the sub_child's ultimate parent. I'd like to set up a relationship such that sub_child.parent would return a reference to the ultimate Parent object.
Is this possible, or is it just a bad idea? I've read the documentation, but don't see much that looks promising.
I'm using python2 and sqlalchemy orm on mysql as a backend.
Have a look at http://docs.sqlalchemy.org/en/latest/orm/nonstandard_mappings.html
With this method you should be able to create a mapping over the three tables you mentioned and assign the columns of the participating tables as attributes to the mapping class.
metadata = MetaData()
parent = Table('parent', metadata,
Column('id', Integer, primary_key=True),
Column('child', Integer, ForeignKey('child.id')),
)
child = Table('child', metadata,
Column('id', Integer, primary_key=True),
Column('subchild', Integer, ForeignKey('subchild.id')),
)
subchild = Table('subchild', metadata,
Column('id', Integer, primary_key=True),
Column('some_column', String),
)
joined = join(parent, child, subchild)
Base = declarative_base()
class Parent(Base):
__table__ = joined
id = column_property(parent.c.id, child.c.id, subchild.c.id)
subchild_attr = subchild.c.some_column

Strange error in SqlAlchemy-migrate on column.copy() with column type BigInteger .

The situation is a little bit simplified. I have two migration files for sqlalchemy-migrate:
In First I create table volume_usage_cache, then autoload it, create copy of its columns and print it:
from sqlalchemy import Column, DateTime
from sqlalchemy import Boolean, BigInteger, MetaData, Integer, String, Table
def upgrade(migrate_engine):
meta = MetaData()
meta.bind = migrate_engine
# Create new table
volume_usage_cache = Table('volume_usage_cache', meta,
Column('deleted', Boolean(create_constraint=True, name=None)),
Column('id', Integer(), primary_key=True, nullable=False),
Column('curr_write_bytes', BigInteger(), default=0),
mysql_engine='InnoDB',
mysql_charset='utf8'
)
volume_usage_cache.create()
volume_usage_cache = Table('volume_usage_cache', meta, autoload=True)
columns = []
[columns.append(column.copy()) for column in volume_usage_cache.columns]
print columns
And I get in log what I expected:
[Column('deleted', Boolean(), table=None), Column('id', Integer(), table=None,
primary_key=True, nullable=False), Column('curr_write_bytes', BigInteger(),
table=None, default=ColumnDefault(0))]
But if I make a copy of columns in Second migration file (that is runed after First):
from sqlalchemy import MetaData, String, Integer, Boolean, Table, Column, Index
def upgrade(migrate_engine):
meta = MetaData()
meta.bind = migrate_engine
table = Table("volume_usage_cache", meta, autoload=True)
columns = []
for column in table.columns:
columns.append(column.copy())
print columns
I get a different result:
[Column('deleted', INTEGER(), table=None, default=ColumnDefault(0)),
Column(u'id', INTEGER(), table=None, primary_key=True, nullable=False),
Column(u'curr_write_bytes', NullType(), table=None)]
Why curr_write_bytes column has NullType?
The are two problems:
First:
In First file we are using old metadata that already contains all columns with need types
So if we create new MetaData instance, SqlAlchemy will load info about table from database and will get the same result as in Second file.
Second:
There is no support in sqlAlchemy for BigInteger column type (in sqlite). And Sqlite doesn't support types of column at all. So we can create table with column BigInteger (and it will work), but after autoload type of such column will be automatically converted to NullType.

Is there a convenient way to alias only conflicting columns when joining tables in SQLAlchemy?

Sometimes it is useful to map a class against a join instead of a single table when using SQLAlchemy's declarative extension. When column names collide, usually in a one-to-many because all primary keys are named id by default, you can use .alias() to prefix every column with its table name. That is inconvenient if you've already written code that assumes your mapped class has non-prefixed names.
For example:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Integer, ForeignKeyConstraint
Base = declarative_base()
t1 = Table('t1',
Base.metadata,
Column('id', Integer, primary_key=True))
t2 = Table('t2',
Base.metadata,
Column('id', Integer, primary_key=True),
Column('fkey', Integer),
ForeignKeyConstraint(['fkey'], [t1.c.id]))
class ST(Base):
__table__ = t1.join(t2)
class ST2(Base):
__table__ = t1.join(t2).alias()
ST has id, fkey properties with each name mapping to the first table in the join that uses the overridden name, so the mapped class does not expose t2's primary key. ST2 has t1_id, t2_id and t2_fkey properties.
Is there a convenient way to alias only some of the columns from each table in the join so the mapped class exposes the more convenient non-prefixed property names for most mapped columns?
You can create alias for each column separately with its label() method. So it's possible something similar to the following (not tested):
from sqlalchemy import select
def alias_dups(join):
dups = set(col.key for col in join.left.columns) & \
set(col.key for col in join.right.columns)
columns = []
for col in join.columns:
if col.key in dups:
col = col.label('%s_%s' % (col.table.name, col.key))
columns.append(col)
return select(columns, from_obj=[join]).alias()
class ST2(Base):
__table__ = alias_dups(t1.join(t2))

sqlalchemy backref slow

Hi have have the following tables
nfiletable = Table(
'NFILE', base.metadata,
Column('fileid', Integer, primary_key=True),
Column('path', String(300)),
Column('filename', String(50)),
Column('filesize', Integer),
schema='NATIVEFILES')#,autoload=True,autoload_with=engine)
sheetnames_table=Table(
'SHEETNAMES', base.metadata, schema='NATIVEFILES',
autoload=True, autoload_with=engine)
nfile_sheet_table=Table(
'NFILE_SHEETNAME',base.metadata,
Column('fileid', Integer, ForeignKey(nfiletable.c.fileid)),
Column('sheetid', Integer, ForeignKey(sheetnames_table.c.sheet_id)),
schema='NATIVEFILES')
and mappers:
nfile_mapper=mapper(Nfile,nfiletable)
mapper(Sheet, sheetnames_table, properties={
'files': relation(
Nfile, secondary=nfile_sheet_table,
primaryjoin=(sheetnames_table.c.sheet_id==nfile_sheet_table.c.sheetid),
secondaryjoin=(nfile_sheet_table.c.fileid==nfiletable.c.fileid),
foreign_keys=[nfile_sheet_table.c.sheetid,nfile_sheet_table.c.fileid],
backref='sheets')
})
when i do the following
upl = Session.query(Nfile).filter_by(fileid=k).one()
sheetdb=[]
for sheet in sheetstoadd:
s = sheetcache[sheetname]
sheetdb.append(s)
upl.sheets = sheetdb
Session.save(upl)
Session.flush()
the line upl.sheets = sheetdb takes forever.
It seems that all files for each sheet in sheetdb are loaded from the db.
How can I prevent this?
if NFile.sheets references a huge collection, put "lazy='dynamic'" on the backref:
mapper(Sheet, sheetnames_table, properties={
'files': relation(
Nfile, secondary=nfile_sheet_table,
backref=backref('sheets', lazy='dynamic'))
})
All the primaryjoin/secondaryjoin/foreign_keys stuff is also not needed since your nfile_sheet_table has ForeignKey constructs on it.

Categories

Resources