sqlalchemy orm mysql dataframe create using two databases - python

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE+URI'] = \
"mariadb+mariadbconnector://harold:password#localhost:3306/movie?charset=utf8mb4"
app.config['SQLALCHEMY_BINDS'] = \
{'two' : "mariadb+mariadbconnector://harold:password#localhost:3306 /tvshow?charset=utf8mb4"}
db = SQLAlchemy(app)
class Movie(db.Model):
__tablename__ = "movie"
movie_id = Column(VARCHAR(length=25), primary_key=True, nullable=False)
title = Column(VARCHAR(length=255), nullable=False)
series_id = Column(VARCHAR(length=25), nullable=False)
rel_date = Column(VARCHAR(length=25), nullable=False)
class TVShow(db.Model):
__bind_key__ = 'two'
__tablename__ = "tvshow"
tv_id = Column(VARCHAR(length=25), primary_key=True, nullable=False)
name = Column(VARCHAR(length=255), nullable=False)
seasons = Column(INTEGER, nullable=False)
episodes = Column(INTEGER, nullable=False)
def set_df():
main1_df = pd.read_sql_table('movie', engine1)
main2_df = pd.read_sql_table('tvshow', engine2)
So, how do I specify which database for the con/engine? I need to make dataframe from both tables.
I have tried using complete sqlalchemy_database_uri, did not work.
Tried using two for second database, did not work. Do I have to make engines? How?

With MySQL/MariaDB, "database" and "schema" mean the same thing, so if both databases are on the same server we can just use schema= to specify the database name.
import pandas as pd
import sqlalchemy as sa
# make default database some other one (just for illustration)
engine = sa.create_engine("mysql://scott:tiger#localhost:3307/mydb", echo=True)
main1_df = pd.read_sql_table("movie", engine, schema="movie")
"""
SELECT movie.movie.id, … FROM movie.movie
"""
main2_df = pd.read_sql_table("tvshow", engine, schema="tvshow")
"""
SELECT tvshow.tvshow.id, … FROM tvshow.tvshow
"""

Related

How can I have two tables A and B where B has all columns of A?

I am currently building a CMS. I want to have a page table and a page_revision table, where page_revision has all columns of page + rev_id + rev_parent_id.
MVCE
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Page(db.Model):
__tablename__ = 'pages'
page_id = db.Column(db.Integer, primary_key=True)
content = db.Column(db.Text(), default='', nullable=False)
class PageRevision(Page):
__tablename__ = 'pages_revisions'
rev_id = db.Column(db.Integer, primary_key=True)
rev_parent_id = db.Column(db.Integer, nullable=True)
The error I get when I try flask db migrate:
sqlalchemy.exc.NoForeignKeysError: Can't find any foreign key relationships between 'pages' and 'pages_revisions'.
I was not sure what should happen. What I'm trying to do is to copy the structure, but I don't want inheritance in the OO sense.
Is there a way to copy all columns (make page_id NOT the primary key) without simply copy-and-paste?
This feels like a dirty hack and I'm not quite sure if it even works as expected but it might give some inspiration ;)
The idea is to create the PageRevision class dynamically by copying the fields of Page.
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Page(db.Model):
__tablename__ = 'pages'
page_id = db.Column(db.Integer, primary_key=True)
content = db.Column(db.Text(), default='', nullable=False)
def columns_of_table(table_type):
for col in Page.__table__.columns:
yield col.key, db.Column(col.type, default=col.default, nullable=col.nullable, primary_key=False)
PageRevision = type('PageRevision', (db.Model, ), {
'rev_id': db.Column(db.Integer, primary_key=True),
'rev_parent_id': db.Column(db.Integer, nullable=True),
**{key: col for key, col in columns_of_table(Page)}
})

Multiple DB Instance connection with SqlAlchemy Core

Im trying to make a multiple database connection. Right now im able to generate a single engine connection for multiple databases. This works by having for my main database (Shared database) defined in mainschema.py (alias dbm):
mymetadata = MetaData()
Base = declarative_base(metadata=mymetadata)
class newbase(DeferredReflection, Base):
__abstract__ = True
table_args = {
'mysql_engine': dict_constants['sql_mainDB_engine'],
'mysql_charset': dict_constants['sql_mainDB_collation'],
'schema' : 'smv',
}
class Company(newbase):
__tablename__ = 'companies'
__table_args__ = table_args
id = Column(INTEGER(unsigned=True), primary_key=True)
company = Column(String(255))
db = Column(String(255))
enabled = Column(BOOLEAN, nullable=False)
Then for each client i will have a specific database defined in dbschema.py (alias dbc)
mymetadata = MetaData()
Base = declarative_base()
table_args = {
'mysql_engine': dict_constants['sql_mainDB_engine']
}
# __abstract__ causes declarative to skip the production of a table or mapper for the class entirely
class newbase(DeferredReflection, Base):
__abstract__ = True
#Creating Base Classes
class ObjectGroup(newbase):
__tablename__ = 'objectgroups'
__table_args__ = table_args
id = Column(INTEGER(unsigned=True), primary_key=True, autoincrement=True)
name = Column(String(30), unique=True, nullable=False)
parentobjectgroup_id = Column(INTEGER(unsigned=True), ForeignKey(id,onupdate="CASCADE", ondelete="RESTRICT"), nullable=False)
created_at = Column(DATETIME, nullable=False)
updated_at = Column(DATETIME, nullable=False)
enabled = Column(BOOLEAN, nullable=False)
After that i create the engine:
self.clientconnection = '%s://%s:%s#%s:%s/%s?charset=%s&use_unicode=0' % (self.dict_constants['sql_clientDB_driver'],
self.dict_constants['sql_clientDB_user'], self.dict_constants['sql_clientDB_passwd'],
host, port, db, self.dict_constants['sql_mainDB_collation'])
self.engine = create_engine(self.clientconnection, echo=False, pool_size=1, pool_timeout=30)
self.base = dbc.newbase
#opening several threads on the same time produces error in prepare, only one at a time
with sema_clientengine:
try:
self.base.prepare(self.engine)
example of a query:
position_table = dbc.Position.__table__
position = alias(position_table, name='position', flat=True)
filter = and_(position.c.object_id == objectid, position.c.validpos == 1)
query = select([position]).where(filter).order_by(position.c.moment_id.desc()).limit(1)
This is how i read:
with sema_reads:
debug_starttime = time()
try:
self.engine.dispose()
with self.engine.connect() as connection:
proxyobject = connection.execute(query)
Okay so basically i have in a same instance one main database called 'smv' and for each client a indepent DB.
So when i create an engine and ran a connect depending on the class it will go to the defined schema. This works great, i can even make joins between databases. Now i need to put the main database 'smv' on a different DB instance so i can scale up and here is when everything stops working because the shared engine only connects to the clients instance and now the smv schema doesnt exist.
Any ideas?
Added on 2019-02-05:
Okay so after some testing i could make it work with sessions, notice that Company table is not just in a different database than Objects, its on a different database instance, different IP and Port:
config_1 = {
"sqlalchemy.url": "%s://%s:%s#%s:%s/%s" % (dict_constants['sql_mainDB_driver'], dict_constants['sql_mainDB_user'],
dict_constants['sql_mainDB_passwd'], dict_constants['sql_mainDB_host'],
dict_constants['sql_mainDB_port'], dict_constants['sql_mainDB_name']),
"sqlalchemy.echo": False,
}
config_2 = {
"sqlalchemy.url": "%s://%s:%s#%s:3307/clientdb" % (dict_constants['sql_mainDB_driver'], dict_constants['sql_mainDB_user'],
dict_constants['sql_mainDB_passwd'], dict_constants['sql_mainDB_host']),
"sqlalchemy.echo": False,
}
engine_1 = engine_from_config(config_1)
engine_2 = engine_from_config(config_2)
Base = declarative_base()
Base2 = declarative_base()
class Company(Base):
__tablename__ = 'companies'
id = Column(INTEGER(unsigned=True), primary_key=True)
company = Column(String(255))
db = Column(String(255))
enabled = Column(BOOLEAN, nullable=False)
class Object(Base2):
__tablename__ = 'objects'
id = Column(INTEGER(unsigned=True), primary_key=True)
name = Column(String(30), unique=True, nullable=False)
objectgroup_id = Column(INTEGER(unsigned=True), nullable=False)
objectaction_id = Column(SMALLINT(unsigned=True), nullable=False)
created_at = Column(DATETIME, nullable=False)
updated_at = Column(DATETIME, nullable=False)
enabled = Column(BOOLEAN, nullable=False)
#session.configure(bind={Company:engine_1})
binds = {Base:engine_1, Base2:engine_2}
session = sessionmaker(binds=binds)
session = session()
print session.query(Company).all()
print session.query(Object).all()
So this works, but i need to ran more complex queries and for that for me ts better this way but it only works when i specify the engine. Any way that session detects it automatically depending on the table class?. Also is it possible to ran a join with tables from different database instance?
company = alias(Company.__table__, name='company', flat=True)
query = select([Company]).order_by(company.c.company)
proxyobject = session.execute(query, bind=engine_1)
rows = proxyobject.fetchall()
print rows

Can you copy/move data between columns in a DB migration file?

I have a SqlAlchemy/Flask application. In it, I have an existing model named MyModelA. This is what it looks like:
class MyModelA(db.Model):
a_id = db.Column(db.Integer, nullable=False, primary_key=True)
my_field1 = db.Column(db.String(1024), nullable=True)
Now, I am adding a child model MyModelB. This is what it looks like:
class MyModelB(db.Model):
b_id = db.Column(db.Integer, nullable=False, primary_key=True)
a_id = db.Column(db.Integer, db.ForeignKey(MyModelA.a_id), nullable=False)
my_field2 = db.Column(db.String(1024), nullable=True)
Then I run python manage.py migrate. This is what shows up in the migration file:
def upgrade():
op.create_table('my_model_b',
sa.Column('b_id', sa.Integer(), nullable=False),
sa.Column('a_id', sa.Integer(), nullable=False),
sa.Column('my_field2', sa.String(length=1024), nullable=True),
sa.ForeignKeyConstraint(['a_id'], [u'my_model_a.a_id'], ),
sa.PrimaryKeyConstraint('b_id')
)
def downgrade():
op.drop_table('my_table_b')
I want to edit this migration such that it for every instance of MyModelA, a child record of instance MyModelB should be created with MyModelB.my_field2 set to MyModelA.my_field1. How can I do it?
Please show the code for upgrade and downgrade.
Edit:
You can do something like this for the one time migration:
db.engine.execute("INSERT INTO model_b (a_id) select a_id from model_a");
of if you really want sqlalschemy code:
for model in db.query(ModelA).all()
db.session.add(ModelB(a_id=model.id))
db.session.commit()
Previous answer:
What you are describing is not something you typically do in migrations. Migrations change/create the structure of your database. If you need it to happen every time a new MyModelA is created, this sounds more like events: http://docs.sqlalchemy.org/en/latest/orm/events.html#session-events
class MyModelA(db.Model):
...
#sqlalchemy.event.listens_for(SignallingSession, 'before_flush')
def insert_model_b(session, transaction, instances):
for instance in session.new:
if isinstance(instance, MyModelA):
model_b = MyModelB(a=instance)
session.add(model_b)
Also, your schema needs to show that relationship (not just the foreign key) so you can assign the yet uninserted model_a to model_b.a:
class MyModelB(db.Model):
b_id = db.Column(db.Integer, nullable=False, primary_key=True)
a_id = db.Column(db.Integer, db.ForeignKey(MyModelA.a_id), nullable=False)
a = relationship("MyModelA")
my_field2 = db.Column(db.String(1024), nullable=True)
Full code example:
import sqlalchemy
from sqlalchemy.orm import relationship
from flask import Flask
from flask.ext.sqlalchemy import SQLAlchemy
from flask.ext.sqlalchemy import SignallingSession
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:'
app.config['SQLALCHEMY_ECHO'] = True
db = SQLAlchemy(app)
class MyModelA(db.Model):
__tablename__ = 'model_a'
a_id = db.Column(db.Integer, nullable=False, primary_key=True)
my_field1 = db.Column(db.String(1024), nullable=True)
class MyModelB(db.Model):
__tablename__ = 'model_b'
b_id = db.Column(db.Integer, nullable=False, primary_key=True)
a_id = db.Column(db.Integer, db.ForeignKey(MyModelA.a_id), nullable=False)
a = relationship(MyModelA)
my_field2 = db.Column(db.String(1024), nullable=True)
#sqlalchemy.event.listens_for(SignallingSession, 'before_flush')
def insert_model_b(session, transaction, instances):
for instance in session.new:
if isinstance(instance, MyModelA):
model_b = MyModelB(a=instance)
session.add(model_b)
db.create_all()
model_a = MyModelA()
db.session.add(model_a)
db.session.commit()

Generated queries contain redundant products?

I have rather simple models like these:
TableA2TableB = Table('TableA2TableB', Base.metadata,
Column('tablea_id', BigInteger, ForeignKey('TableA.id')),
Column('tableb_id', Integer, ForeignKey('TableB.id')))
class TableA(Base):
__tablename__ = 'TableA'
id = Column(BigInteger, primary_key=True)
infohash = Column(String, unique=True)
url = Column(String)
tablebs = relationship('TableB', secondary=TableA2TableB, backref='tableas')
class TableB(Base):
__tablename__ = 'TableB'
id = Column(Integer, primary_key=True)
url = Column(String, unique=True)
However, sqla generates queries like
SELECT "TableB".id, "TableB".url AS "TableB_url" FROM "TableB", "TableA2TableB"
WHERE "TableA2TableB".tableb_id = "TableB".id AND "TableA2TableB".tablea_id = 408997;
But why is there a cartesian product in the query when the attributes selected are those in TableB? TableA2TableB shouldn't be needed.
Thanks
As it is right now, there is a backref relationship in TableB (tableas) and it's loaded because the default loading mode is set to select.
You may want to change the TableA.tablebs to
tablebs = relationship('TableB', secondary=TableA2TableB, backref='tableas', lazy="dynamic")

Outer Join with sqlalchemy (ORM)

I am trying to do a relationship() with an OUTER JOIN so that it joins the second table if there is something to join it with. I am currently stuck on how to do this though, I cannot seem to figure out the right combination of options(), relationship() and outerjoin().
I have the following tables and I am trying to join AppLike to Application if a row exists with the Application ID AND the artistID (which is provided by the function)
Happy to provide any additional information, I already have one of my joins working as you can see below, but there will always be a row to match for that one.
from sqlalchemy import Column
from . import Base
from . import DBSession
from sqlalchemy.dialects.mysql import (
INTEGER,
VARCHAR,
TEXT,
TINYINT,
)
from sqlalchemy.sql import and_
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship, joinedload
import time
# 0 = new
# 1 = Denied
# 2 = Accepted
def getNewApplications(artistID):
query = DBSession.query(Application).\
options(joinedload('pieces')).\
options(joinedload('vote')).\
filter(AppLike.artist_id==artistID).\
filter(Application.approved==0)
#join(AppPiece, Application.app_id==AppPiece.app_id).\
#outerjoin(AppLike, and_(Application.app_id==AppLike.app_id,
# AppLike.artist_id==artistID)).\
import pdb; pdb.set_trace()
return query.all()
class Application(Base):
""" The SQLAlchemy declarative model class for a FileFavorite object. """
__tablename__ = 'applications'
__table_args__ = {
'mysql_engine': 'InnoDB',
'mysql_charset': 'utf8'
}
app_id = Column(INTEGER(11), autoincrement=True, primary_key=True, nullable=False)
name = Column(VARCHAR(64), nullable=False)
nickname = Column(VARCHAR(64), nullable=False)
email = Column(VARCHAR(255), nullable=False)
description = Column(TEXT(), nullable=False)
profile_link = Column(VARCHAR(128), nullable=False)
location = Column(VARCHAR(64), nullable=False)
approved = Column(TINYINT(4), nullable=False)
pieces = relationship("AppPiece", lazy='joined')
vote = relationship("AppLike", lazy='joined')
def __init__(self, name, nickname, email, desc, profileLink,
location, approved):
self.name = name
self.nickname = nickname
self.email = email
self.description = desc
self.profile_link = profileLink
self.location = location
self.approved = approved
class AppPiece(Base):
""" The SQLAlchemy declarative model class for a FileFavorite object. """
__tablename__ = 'app_pieces'
__table_args__ = {
'mysql_engine': 'InnoDB',
'mysql_charset': 'utf8'
}
app_piece_id = Column(INTEGER(11), autoincrement=True, primary_key=True, nullable=False)
app_id = Column(INTEGER(11), ForeignKey('applications.app_id'))
link = Column(VARCHAR(128), nullable=False)
def __init__(self, appID, link):
self.app_id = appID
self.link = link
class AppLike(Base):
""" The SQLAlchemy declarative model class for a FileFavorite object. """
__tablename__ = 'app_likes'
__table_args__ = {
'mysql_engine': 'InnoDB',
'mysql_charset': 'utf8'
}
app_id = Column(INTEGER(11), ForeignKey('applications.app_id'))
artist_id = Column(INTEGER(11), primary_key=True, nullable=False)
vote = Column(TINYINT(4), nullable=False)
def __init__(self, appID, artistID, vote):
self.app_id = appID
self.artist_id = artistID
self.vote = vote
You definitely don't need options(joinedload('pieces')), it is already defined in your models (lazy='joined'). The join condition is the tricky part here and needs to be done using subquery, since we want to filter there as well. So, the final query should look something like this:
# We do the filtering on AppLike in the subquery and later join
# Application to it.
applike_subq = DBSession.query(AppLike).\
filter(AppLike.artist_id == artistID).subquery()
query = DBSession.query(Application).\
outerjoin(applike_subq, Application.vote).\
filter(Application.approved == 0).all()

Categories

Resources