How to create index for a SQLite3 database using SQLAlchemy? - python

I have multiple SQLite3 databases for which the models are not available.
def index_db(name, tempdb):
print(f'{name.ljust(padding)} Indexing file: {tempdb}')
if tempdb.endswith('primary.sqlite'):
conn = sqlite3.connect(tempdb)
conn.execute('CREATE INDEX packageSource ON packages (rpm_sourcerpm)')
conn.commit()
conn.close()
How can I perform the same operation using SQLAlchemy?

I can come up with two ways to add that index through SQLAlchemy:
if you do not reflect, execute the SQL statement directly
if you reflect you table/model, add an index to it
Firstly, let's create the table to work on.
import sqlite3
con = sqlite3.connect("/tmp/73526761.db")
con.execute("CREATE TABLE t73526761 (id INT PRIMARY KEY, name VARCHAR)")
con.commit()
con.close()
Then, without reflecting, you can execute your raw SQL with the following.
import sqlalchemy as sa
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
with engine.begin() as con:
con.execute(sa.text("CREATE INDEX t73526761_name_idx ON t73526761 (name)"))
con.commit()
Or if you reflect the table only (SQLAlchemy core):
import sqlalchemy as sa
metadata_obj = sa.MetaData()
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
t73526761 = sa.Table("t73526761", metadata_obj, autoload_with=engine)
t73526761_name_idx = sa.Index("t73526761_name_idx", t73526761.c.name)
t73526761_name_idx.create(bind=engine) # emits CREATE INDEX t73526761_name_idx ON t73526761 (name)
Or if you reflect the model (SQLAlchemy orm):
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
engine = sa.create_engine("sqlite:////tmp/73526761.db", future=True)
class K73526761(Base):
__table__ = sa.Table("t73526761", Base.metadata, autoload_with=engine)
t73526761_name_idx = sa.Index("t73526761_name_idx", K73526761.name)
t73526761_name_idx.create(bind=engine) # emits CREATE INDEX t73526761_name_idx ON t73526761 (name)

Related

Increase speed of SQLAlchemy Insert.execute()

Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()

Copy one database to another using SQLAlchemy

I'm trying to copy a database using SQLAlchemy. The first attempt was:
from from sqlalchemy import create_engine, MetaData
from sqlalchemy.orm import sessionmaker
from urls import engine_urls
engine1 = create_engine(engine_urls[0])
engine2 = create_engine(engine_urls[1])
metadata = MetaData()
metadata.reflect(engine1)
tables = metadata.tables
metadata.create_all(engine2)
Session1 = sessionmaker(bind=engine1)
from sqlalchemy import insert
with Session1.begin() as session:
for key in tables:
table_object = tables[key]
for row in session.query(table_object):
s = insert(table_object).\
values(**dict(zip(row.keys(), row)))
engine2.execute(s)
But this code does not work since the order in which inserts are done is arbitrary and this violates FK constraints. For example, inserting a child before a parent will cause such a violation. How could I achieve this task? Is there a part of the framework that would do this easily? I can't find it.
Here is what I use. Works well.
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# Requires SQLALCHEMY 1.4+
src_engine = create_engine("sqlite:///mydb.sqlite")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("postgresql+psycopg2://#localhost/ngas")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# drop all tables in target database
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
# # Delete all data in target database
# for table in reversed(tgt_metadata.sorted_tables):
# table.delete()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# create all tables in target database
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# refresh metadata before you can copy data
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy all data from src to target
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
stmt.execute(row._asdict())
if anyone had difficulties executing the proposed
routine as a solution because "stmt.execute(row._asdict())"
generates an error in version 1.4, here is an alternative
that I successfully produced:
# Copy all data from src to target
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index, '>>', dict(row))
stmt = table.insert().values(row._asdict())
tgt_conn.execute(stmt)
tgt_conn.commit()

pandas DataFrame upsert to SQLite

All I want is a simple Upsert from the DataFrame to SQLite. However, since pd.to_sql() does not have Upsert, I had to implement it with SQLAlchemy instead.
SQLite:
CREATE TABLE test (col1 INTEGER, col2 text, col3 REAL, PRIMARY KEY(col1, col2));
python:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.automap import automap_base
def test_upsert():
df = pd.DataFrame({'col1':1, 'col2':'a', 'col3':1.5}, index=[0])
sql_url = 'sqlite:///testDB.db'
table = 'test'
engine = create_engine(sql_url)
with engine.connect() as conn:
base = automap_base()
base.prepare(engine, reflect=True)
target_table = Table(table, base.metadata, autoload=True, autoload_with=engine)
stmt = insert(target_table).values(df.to_dict(orient='records'))
update_dict = {c.name: c for c in stmt.excluded if not c.primary_key}
conn.execute(stmt.on_conflict_do_update(constraint=f'{table}_pkey', set_=update_dict))
The script above works with Postgres previously but it keeps giving me the error when used with SQLite.
sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near "ON": syntax error
[SQL: INSERT INTO test (col1, col2, col3) VALUES (?, ?, ?) ON CONFLICT (test_pkey) DO UPDATE SET col3 = excluded.col3]
[parameters: (1, 'a', 1.5)]
(Background on this error at: http://sqlalche.me/e/14/e3q8)
I'm not sure what I did wrong, or if there's any better solution since it seems like a very common operation.
Any help is appreciated.

sqlalchemy python select and insert

i have following:
from sqlalchemy import create_engine
engine1 = create_engine('mysql://user:password#host1/schema', echo=True)
engine2 = create_engine('mysql://user:password#host2/schema')
connection1 = engine1.connect()
connection2 = engine2.connect()
table1 = connection1.execute("select * from table1")
table2 = connection2.execute("select * from table2")
Now i want to insert all entries from this table1 into an identical empty table table2 in connection2.
How can i achive that?
I could also create a dict out of table1 and insert it then into table2. As i learned from the documentation of sqlalchemy there is a way to do that, but the examples there assume that you create a whole new table in order to insert into it with new_table.insert(). It doesnt work for my existing tables.
Thanks

SQLAlchemy - copy schema and data of subquery to another database

I am trying to copy data from a subquery from postgres (from_engine) to sqlite database. I can achieve this for copying a table using following command:
smeta = MetaData(bind=from_engine)
table = Table(table_name, smeta, autoload=True)
table.metadata.create_all(to_engine)
However, I am not sure how to achieve the same for a subquery statement.
-Sandeep
Edit:
Follow up on the answer. Once I have created the table I want to create a subquery stmt as follows:
table = Table("newtable", dest_metadata, *columns)
stmt = dest_session.query(table).subquery();
However, the last stmt ends up with error
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (ProgrammingError) relation "newtable" does not exist
LINE 3: FROM newtable) AS anon_1
One way that works at least in some cases:
Use column_descriptions of a query object to get some information about the columns in the result set.
With that information you can build the schema to create the new table in the other database.
Run the query in the source database and insert the results into the new table.
First of some setup for the example:
from sqlalchemy import create_engine, MetaData,
from sqlalchemy import Column, Integer, String, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
# Engine to the database to query the data from
# (postgresql)
source_engine = create_engine('sqlite:///:memory:', echo=True)
SourceSession = sessionmaker(source_engine)
# Engine to the database to store the results in
# (sqlite)
dest_engine = create_engine('sqlite:///:memory:', echo=True)
DestSession = sessionmaker(dest_engine)
# Create some toy table and fills it with some data
Base = declarative_base()
class Pet(Base):
__tablename__ = 'pets'
id = Column(Integer, primary_key=True)
name = Column(String)
race = Column(String)
Base.metadata.create_all(source_engine)
sourceSession = SourceSession()
sourceSession.add(Pet(name="Fido", race="cat"))
sourceSession.add(Pet(name="Ceasar", race="cat"))
sourceSession.add(Pet(name="Rex", race="dog"))
sourceSession.commit()
Now to the interesting bit:
# This is the query we want to persist in a new table:
query= sourceSession.query(Pet.name, Pet.race).filter_by(race='cat')
# Build the schema for the new table
# based on the columns that will be returned
# by the query:
metadata = MetaData(bind=dest_engine)
columns = [Column(desc['name'], desc['type']) for desc in query.column_descriptions]
column_names = [desc['name'] for desc in query.column_descriptions]
table = Table("newtable", metadata, *columns)
# Create the new table in the destination database
table.create(dest_engine)
# Finally execute the query
destSession = DestSession()
for row in query:
destSession.execute(table.insert(row))
destSession.commit()
There should be more efficient ways to do the last loop. But bulk-insert is another topic.
You can also go through a pandas data frame. For example a method would use pandas.read_sql(query, source.connection) and df.to_sql(table_name, con=destination.connection).

Categories

Resources