sqlalchemy python select and insert - python

i have following:
from sqlalchemy import create_engine
engine1 = create_engine('mysql://user:password#host1/schema', echo=True)
engine2 = create_engine('mysql://user:password#host2/schema')
connection1 = engine1.connect()
connection2 = engine2.connect()
table1 = connection1.execute("select * from table1")
table2 = connection2.execute("select * from table2")
Now i want to insert all entries from this table1 into an identical empty table table2 in connection2.
How can i achive that?
I could also create a dict out of table1 and insert it then into table2. As i learned from the documentation of sqlalchemy there is a way to do that, but the examples there assume that you create a whole new table in order to insert into it with new_table.insert(). It doesnt work for my existing tables.
Thanks

Related

Increase speed of SQLAlchemy Insert.execute()

Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()

How to create index for a SQLite3 database using SQLAlchemy?

I have multiple SQLite3 databases for which the models are not available.
def index_db(name, tempdb):
print(f'{name.ljust(padding)} Indexing file: {tempdb}')
if tempdb.endswith('primary.sqlite'):
conn = sqlite3.connect(tempdb)
conn.execute('CREATE INDEX packageSource ON packages (rpm_sourcerpm)')
conn.commit()
conn.close()
How can I perform the same operation using SQLAlchemy?
I can come up with two ways to add that index through SQLAlchemy:
if you do not reflect, execute the SQL statement directly
if you reflect you table/model, add an index to it
Firstly, let's create the table to work on.
import sqlite3
con = sqlite3.connect("/tmp/73526761.db")
con.execute("CREATE TABLE t73526761 (id INT PRIMARY KEY, name VARCHAR)")
con.commit()
con.close()
Then, without reflecting, you can execute your raw SQL with the following.
import sqlalchemy as sa
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
with engine.begin() as con:
con.execute(sa.text("CREATE INDEX t73526761_name_idx ON t73526761 (name)"))
con.commit()
Or if you reflect the table only (SQLAlchemy core):
import sqlalchemy as sa
metadata_obj = sa.MetaData()
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
t73526761 = sa.Table("t73526761", metadata_obj, autoload_with=engine)
t73526761_name_idx = sa.Index("t73526761_name_idx", t73526761.c.name)
t73526761_name_idx.create(bind=engine) # emits CREATE INDEX t73526761_name_idx ON t73526761 (name)
Or if you reflect the model (SQLAlchemy orm):
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
class K73526761(Base):
__table__ = sa.Table("t73526761", Base.metadata, autoload_with=engine)
t73526761_name_idx = sa.Index("t73526761_name_idx", K73526761.name)
t73526761_name_idx.create(bind=engine) # emits CREATE INDEX t73526761_name_idx ON t73526761 (name)

SQLITE3 + Python (I need to ask bank 1 table if its data exists in bank 2 table)

I have a doubt about python and sqlite3.
import sqlite3
conna= sqlite3.connect('db_a')
a = conna.cursor()
connb= sqlite3.connect('db_b')
b = conna.cursor()
I don't know how to ask the relational question between banks, can someone instruct me?
I don't want to use DEF, just the SELECT code for a variable to assume
query = """SELECT COL1 FROM TABLE1.DB_A WHERE NOT EXISTS (SELECT COL1 FROM TABLE2.DB_B WHERE COL1.TABLE2.DE_B = COL1.TABLE1.DE_A)"""
cursor.execute(query)
records = cursor.fetchall()
for row in records:
print(row[0])
Can someone help me?
If the tables exist in different databases you need the ATTACH DATABASE statement to use the 2nd database with the connection object that you connect to the 1st database:
import sqlite3
conn = sqlite3.connect('db_a')
cursor = conn.cursor()
attach = "ATTACH DATABASE 'db_b' AS db_b;"
cursor.execute(attach)
query = """
SELECT t1.COL1
FROM TABLE1 AS t1
WHERE NOT EXISTS (
SELECT t2.COL1
FROM db_b.TABLE2 AS t2
WHERE t2.COL1 = t1.COL1
)
"""
cursor.execute(query)
records = cursor.fetchall()
for row in records:
print(row[0])
detach = "DETACH DATABASE db_b;"
cursor.execute(detach)
Also, instead of EXISTS you could use EXCEPT with the difference being that EXCEPT returns only distinct results:
query = """
SELECT COL1 FROM TABLE1
EXCEPT
SELECT COL1 FROM db_b.TABLE2
"""

pandas DataFrame upsert to SQLite

All I want is a simple Upsert from the DataFrame to SQLite. However, since pd.to_sql() does not have Upsert, I had to implement it with SQLAlchemy instead.
SQLite:
CREATE TABLE test (col1 INTEGER, col2 text, col3 REAL, PRIMARY KEY(col1, col2));
python:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.automap import automap_base
def test_upsert():
df = pd.DataFrame({'col1':1, 'col2':'a', 'col3':1.5}, index=[0])
sql_url = 'sqlite:///testDB.db'
table = 'test'
engine = create_engine(sql_url)
with engine.connect() as conn:
base = automap_base()
base.prepare(engine, reflect=True)
target_table = Table(table, base.metadata, autoload=True, autoload_with=engine)
stmt = insert(target_table).values(df.to_dict(orient='records'))
update_dict = {c.name: c for c in stmt.excluded if not c.primary_key}
conn.execute(stmt.on_conflict_do_update(constraint=f'{table}_pkey', set_=update_dict))
The script above works with Postgres previously but it keeps giving me the error when used with SQLite.
sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near "ON": syntax error
[SQL: INSERT INTO test (col1, col2, col3) VALUES (?, ?, ?) ON CONFLICT (test_pkey) DO UPDATE SET col3 = excluded.col3]
[parameters: (1, 'a', 1.5)]
(Background on this error at: http://sqlalche.me/e/14/e3q8)
I'm not sure what I did wrong, or if there's any better solution since it seems like a very common operation.
Any help is appreciated.

SQLAlchemy - copy schema and data of subquery to another database

I am trying to copy data from a subquery from postgres (from_engine) to sqlite database. I can achieve this for copying a table using following command:
smeta = MetaData(bind=from_engine)
table = Table(table_name, smeta, autoload=True)
table.metadata.create_all(to_engine)
However, I am not sure how to achieve the same for a subquery statement.
-Sandeep
Edit:
Follow up on the answer. Once I have created the table I want to create a subquery stmt as follows:
table = Table("newtable", dest_metadata, *columns)
stmt = dest_session.query(table).subquery();
However, the last stmt ends up with error
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (ProgrammingError) relation "newtable" does not exist
LINE 3: FROM newtable) AS anon_1
One way that works at least in some cases:
Use column_descriptions of a query object to get some information about the columns in the result set.
With that information you can build the schema to create the new table in the other database.
Run the query in the source database and insert the results into the new table.
First of some setup for the example:
from sqlalchemy import create_engine, MetaData,
from sqlalchemy import Column, Integer, String, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
# Engine to the database to query the data from
# (postgresql)
source_engine = create_engine('sqlite:///:memory:', echo=True)
SourceSession = sessionmaker(source_engine)
# Engine to the database to store the results in
# (sqlite)
dest_engine = create_engine('sqlite:///:memory:', echo=True)
DestSession = sessionmaker(dest_engine)
# Create some toy table and fills it with some data
Base = declarative_base()
class Pet(Base):
__tablename__ = 'pets'
id = Column(Integer, primary_key=True)
name = Column(String)
race = Column(String)
Base.metadata.create_all(source_engine)
sourceSession = SourceSession()
sourceSession.add(Pet(name="Fido", race="cat"))
sourceSession.add(Pet(name="Ceasar", race="cat"))
sourceSession.add(Pet(name="Rex", race="dog"))
sourceSession.commit()
Now to the interesting bit:
# This is the query we want to persist in a new table:
query= sourceSession.query(Pet.name, Pet.race).filter_by(race='cat')
# Build the schema for the new table
# based on the columns that will be returned
# by the query:
metadata = MetaData(bind=dest_engine)
columns = [Column(desc['name'], desc['type']) for desc in query.column_descriptions]
column_names = [desc['name'] for desc in query.column_descriptions]
table = Table("newtable", metadata, *columns)
# Create the new table in the destination database
table.create(dest_engine)
# Finally execute the query
destSession = DestSession()
for row in query:
destSession.execute(table.insert(row))
destSession.commit()
There should be more efficient ways to do the last loop. But bulk-insert is another topic.
You can also go through a pandas data frame. For example a method would use pandas.read_sql(query, source.connection) and df.to_sql(table_name, con=destination.connection).

Categories

Resources