MySQL multiple table UPDATE query using sqlalchemy core? - python

I want to update two tables using one query.
I'd like something along the lines of this
UPDATE tblReceipt, tblReturn
SET tblReceipt.ReturnedDate = tblReturn.CreatedDate,
tblReturn.ReturnerName = tblReceipt.Name
WHERE tblReturn.Id = tblReceipt.ReturnId
AND tblReceipt.returned = TRUE
I currently have the following but I'm not sure how to add the second table reference. Is there a simple way to do this?
update(Receipt)
.values(ReturnedDate=Return.CreatedDate, ReturnName=Receipt.Name)
.where(Return.Id==Receipt.ReturnId)
.where(Receipt.Returned == True)

From the documentation for update():
The keys within values can be either Column objects or their string
identifiers (specifically the “key” of the Column, normally but not
necessarily equivalent to its “name”). Normally, the Column objects
used here are expected to be part of the target Table that is the
table to be updated. However when using MySQL, a multiple-table UPDATE
statement can refer to columns from any of the tables referred to in
the WHERE clause.
Emphasis mine.
I've created an example core pattern based on your example above but not exact, although should be enough to work with:
from sqlalchemy_app import engine
import sqlalchemy as sa
metadata = sa.MetaData()
receipt = sa.Table(
"receipt",
metadata,
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("something", sa.Integer),
)
returns = sa.Table(
"returns",
metadata,
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("receipt_id", sa.Integer, sa.ForeignKey('receipt.id')),
sa.Column("somethingelse", sa.Integer)
)
if __name__ == "__main__":
metadata.drop_all(engine)
metadata.create_all(engine)
with engine.connect() as conn:
conn.execute(
receipt.insert(
values=[{"id": 1, "something": 1}]
)
)
conn.execute(
returns.insert(
values=[{"id": 1, "receipt_id": 1, "somethingelse": 99}]
)
)
conn.execute(
sa.update(receipt)
.values({receipt.c.something: 3, returns.c.somethingelse: 4})
.where(sa.and_(receipt.c.id == 1, returns.c.id == 1))
)
As per the documentation, the parent table of the two columns that are referenced in .values() are referenced in the .where() clause.
Here is the update statement it generates:
2019-08-17 11:27:39,573 INFO sqlalchemy.engine.base.Engine UPDATE receipt, returns SET returns.somethingelse=%(returns_somethingelse)s, receipt.something=%(something)s WHERE receipt.id = %(id_1)s AND returns.id = %(id_2)s
2019-08-17 11:27:39,582 INFO sqlalchemy.engine.base.Engine {'returns_somethingelse': 4, 'something': 3, 'id_1': 1, 'id_2': 1}
Note that if you just print the query out to inspect it, you'll get something that looks like this:
UPDATE receipt SET somethingelse=:returns_somethingelse, something=:something FROM returns WHERE receipt.id = :id_1 AND returns.id = :id_2
...as this is a mysql specific behavior you would have to compile it using the mysql dialect:
from sqlalchemy.dialects import mysql
print(statement.compile(dialect=mysql.dialect())
will print:
UPDATE receipt, returns SET returns.somethingelse=%s, receipt.something=%s WHERE receipt.id = %s AND returns.id = %s

Related

Increase speed of SQLAlchemy Insert.execute()

Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()

How can I Insert the value of CURRENT TIMESTAMP using SQLAlchemy's connection.execute

I want to insert multiple rows using connection.execute, and one of the columns must be set to the result of the database's CURRENT_TIMESTAMP function.
For example, given this table:
import sqlalchemy as sa
metadata = sa.MetaData()
foo = sa.Table('foo', metadata,
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('ts', sa.TIMESTAMP))
# I'm using Sqlite for this example, but this question
# is database-agnostic.
engine = sa.create_engine('sqlite://', echo=True)
metadata.create_all(engine)
I can insert a single row like this:
conn = engine.connect()
with conn.begin():
ins = foo.insert().values(ts=sa.func.current_timestamp())
conn.execute(ins)
However when I try to insert multiple rows:
with conn.begin():
ins = foo.insert()
conn.execute(ins, [{'ts': sa.func.current_timestamp()}])
a TypeError is raised:
sqlalchemy.exc.StatementError: (builtins.TypeError) SQLite DateTime type only accepts Python datetime and date objects as input.
[SQL: INSERT INTO foo (ts) VALUES (?)]
[parameters: [{'ts': <sqlalchemy.sql.functions.current_timestamp at 0x7f3607e21070; current_timestamp>}]
Replacing the function with the string "CURRENT_TIMESTAMP" results in a similar error.
Is there a way to get the database to set the column to CURRENT_TIMESTAMP using connection.execute?
I'm aware that I can work around this by querying for the value of CURRENT_TIMESTAMP within the same transaction and using that value in the INSERT values, or executing and UPDATE after the INSERT. I'm specifically asking whether this can be done in connection.execute's *multiparams argument.
It's a hack for sure, but this appears to work for SQLite at least:
from datetime import datetime
from pprint import pprint
import sqlalchemy as sa
engine = sa.create_engine("sqlite:///:memory:")
metadata = sa.MetaData()
foo = sa.Table(
"foo",
metadata,
sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
sa.Column("ts", sa.TIMESTAMP),
sa.Column("txt", sa.String(50)),
)
foo.create(engine)
with engine.begin() as conn:
ins_query = str(foo.insert().compile()).replace(
" :ts, ", " CURRENT_TIMESTAMP, "
)
print(ins_query)
# INSERT INTO foo (id, ts, txt) VALUES (:id, CURRENT_TIMESTAMP, :txt)
data = [{"id": None, "txt": "Alfa"}, {"id": None, "txt": "Bravo"}]
conn.execute(sa.text(ins_query), data)
print(datetime.now())
# 2021-03-06 17:41:35.743452
# (local time here is UTC-07:00)
results = conn.execute(sa.text("SELECT * FROM foo")).fetchall()
pprint(results, width=60)
"""
[(1, '2021-03-07 00:41:35', 'Alfa'),
(2, '2021-03-07 00:41:35', 'Bravo')]
"""

Returning Number of affected rows from SQL Server with SQLAlchemy

I use sqlalchemy to make changes to a table in SQL Server database, and would like to get back number of affected rows.
I know there is .rowcount attribute to ResultProxy, but as, for example, this answer is demonstrating .rowcount is not necessarily the same as number of affected rows.
SQL Server uses ##ROWCOUNT to access number of affected rows from the previous statement execution.
Is there a way to modify an sqlalchemy expression that uses insert / update statement to end with SELECT ##ROWCOUNT?
For example, given:
from sqlalchemy import Table, Column, Integer, String, MetaData, create_engine
url = 'mssql+pyodbc://dsn'
engine = create_engine(url)
metadata = MetaData()
users = Table('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('fullname', String),
)
ins = users.insert().values(name='jack', fullname='Jack Jones')
upd1 = users.update().values(fullname='Jack Doe').where(users.c.name == 'jack')
upd2 = users.update().values(fullname='Jack Doe').where(users.c.name == 'jack')
I could prepend SELECT ##ROWCOUNT to an update statement:
sel = select([text('##ROWCOUNT')])
sql1 = sel.suffix_with(upd2)
print(sql1.compile(engine, compile_kwargs={"literal_binds": True}))
Yielding "wrong" query:
SELECT ##ROWCOUNT UPDATE users SET fullname='Jack Doe' WHERE users.name = 'jack'
Trying to do the "right" thing:
sql2 = upd2.suffix_with(sel)
Raises AttributeError since 'Update' object has no attribute 'suffix_with'.
So is there a way to get desired sql query:
UPDATE users SET fullname='Jack Doe' WHERE users.name = 'jack';
SELECT ##ROWCOUNT
Using sql expression language without fully textual constructs.

Load records from arbitrary table by primary key passing a list as arguments

I'm using SQLAlchemy core functionality without ORM and I need to load records from arbitrary table by primary key passing a list as arguments.
Currently I'm doing it like:
records = select([arbitrary_table], list(arbitrary_table.primary_key.columns._all_cols)[0].in_([1, 2, 3]))
arbitrary_table is passed as parameter and it could be any table with restriction that primary key is integer and not composite, but primary key could have different names.
I have few questions about it:
Q1:
Is there any way to optimize it?
I'm much sure that list(self.arbitrary_table.primary_key.columns._all_cols)[0] is NOT the best way to get a primary key column.
Q2:
How to do the same with ORM query?
The example from SA site has my_user = session.query(User).get(5) but it takes only one ID as argument and has no override to take a list of IDs.
I will be very thankful for your suggestions.
the "primary_key" attribute is an instance of PrimaryKeyConstraint, which is iterable in place, so list(table.primary_key)[0] is all you need to get at that column.
The example below illustrates two composite-compatible techniques (edit: oh, you said "not composite". Well it's cool code anyway...use select_by_single_pk()), one requires tuple support (postgresql, maybe mysql), the other one strings out AND/OR, as well as a single PK approach. Then it shows query.get() which accepts a tuple:
from sqlalchemy import tuple_, or_, and_
def select_by_composite_pk(table, values):
"works only in a high-capability database like postgresql"
return table.select().where(tuple_(*table.primary_key).in_(values))
def select_by_composite_pk_no_tuples(table, values):
"works in any database"
return table.select().where(
or_(
*[
and_(*[col == val for col, val in zip(table.primary_key, val)])
for val in values
]
))
def select_by_single_pk(table, values):
"works in any database"
return table.select().where(list(table.primary_key)[0].in_(values))
if __name__ == '__main__':
from sqlalchemy import create_engine, Table, Column, Integer, MetaData
eng = create_engine("postgresql://scott:tiger#localhost/test", echo=True)
conn = eng.connect()
trans = conn.begin()
m = MetaData()
# single PK column
a = Table('a', m, Column('x', Integer, primary_key=True),
Column('y', Integer))
# composite PK column
b = Table('b', m, Column('x', Integer, primary_key=True),
Column('y', Integer, primary_key=True))
m.create_all(conn)
conn.execute(a.insert(), [
{'x': i, 'y': i * 2} for i in xrange(10)
])
conn.execute(b.insert(), [
{'x': i, 'y': i * 2} for i in xrange(10)
])
print conn.execute(
select_by_composite_pk(a,
[tuple_(3, ), tuple_(5, ), tuple_(9, )])).fetchall()
print conn.execute(
select_by_composite_pk(b,
[tuple_(3, 6), tuple_(5, 10), tuple_(9, 18)])).fetchall()
print conn.execute(
select_by_composite_pk_no_tuples(b,
[(3, 6), (5, 10), (9, 18)])).fetchall()
print conn.execute(
select_by_single_pk(b, [3, 5, 9])).fetchall()
# ORM query version
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class MyCompositeClass(Base):
__table__ = b
# get accepts a tuple
print Session(conn).query(MyCompositeClass).get((5, 10))

creating a temporary table from a query using sqlalchemy orm

I can create a temporary table this way:
session.execute("CREATE TABLE temptable SELECT existingtable.id, "
"existingtable.column2 FROM existingtable WHERE existingtable.id<100000")
but the new table is unreadable because it says it has no primary key. existingtable.id is the primary key of exisitingtable, so I expected it to get the same treatment in the temp table.
However, I would rather find some ORM way of doing this anyway. Given:
temp_table = Table('temptable', metadata,
Column('id', Integer, primary_key=True),
Column('column2', Integer),
useexisting=True )
class TempTable(object):
pass
mapper(TempTable, temp_table)
temp_table.create(bind=session.bind, checkfirst=True)
if session.query(TempTable).delete(): #make sure it's empty
session.commit()
How can I populate temp_table with some selected contents of existingtable without doing 100000 session.query.add(TempTable(...)) commands? Or is there a way of creating the table from a query similar to the plain SQL version above?
It's not exactly ORM, but to create the table initially, I'd clone the table structure (see cloneTable in the example below). For copying the data, I then would use the InsertFromSelect example.
Edit: Since version 0.8.3, SqlAlchemy supports Insert.from_select() out of the box. Hence the InsertFromSelect class and the respective visitor in the example below can be directly replaced and are no longer needed. I leave the original example unchanged for historic reasons.
Here is a working example
from sqlalchemy import Table
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import UpdateBase
class InsertFromSelect(UpdateBase):
def __init__(self, table, select):
self.table = table
self.select = select
#compiles(InsertFromSelect)
def visit_insert_from_select(element, compiler, **kw):
return "INSERT INTO %s %s" % (
compiler.process(element.table, asfrom=True),
compiler.process(element.select)
)
def cloneTable(name, table, metadata):
cols = [c.copy() for c in table.columns]
constraints = [c.copy() for c in table.constraints]
return Table(name, metadata, *(cols + constraints))
# test data
from sqlalchemy import MetaData, Column, Integer
from sqlalchemy.engine import create_engine
e = create_engine('sqlite://')
m = MetaData(e)
t = Table('t', m, Column('id', Integer, primary_key=True),
Column('number', Integer))
t.create()
e.execute(t.insert().values(id=1, number=3))
e.execute(t.insert().values(id=9, number=-3))
# create temp table
temp = cloneTable('temp', t, m)
temp.create()
# copy data
ins = InsertFromSelect(temp, t.select().where(t.c.id>5))
e.execute(ins)
# print result
for r in e.execute(temp.select()):
print(r)

Categories

Resources