Copy one database to another using SQLAlchemy - python

I'm trying to copy a database using SQLAlchemy. The first attempt was:
from from sqlalchemy import create_engine, MetaData
from sqlalchemy.orm import sessionmaker
from urls import engine_urls
engine1 = create_engine(engine_urls[0])
engine2 = create_engine(engine_urls[1])
metadata = MetaData()
metadata.reflect(engine1)
tables = metadata.tables
metadata.create_all(engine2)
Session1 = sessionmaker(bind=engine1)
from sqlalchemy import insert
with Session1.begin() as session:
for key in tables:
table_object = tables[key]
for row in session.query(table_object):
s = insert(table_object).\
values(**dict(zip(row.keys(), row)))
engine2.execute(s)
But this code does not work since the order in which inserts are done is arbitrary and this violates FK constraints. For example, inserting a child before a parent will cause such a violation. How could I achieve this task? Is there a part of the framework that would do this easily? I can't find it.

Here is what I use. Works well.
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# Requires SQLALCHEMY 1.4+
src_engine = create_engine("sqlite:///mydb.sqlite")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("postgresql+psycopg2://#localhost/ngas")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# drop all tables in target database
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
# # Delete all data in target database
# for table in reversed(tgt_metadata.sorted_tables):
# table.delete()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# create all tables in target database
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# refresh metadata before you can copy data
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy all data from src to target
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
stmt.execute(row._asdict())

if anyone had difficulties executing the proposed
routine as a solution because "stmt.execute(row._asdict())"
generates an error in version 1.4, here is an alternative
that I successfully produced:
# Copy all data from src to target
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index, '>>', dict(row))
stmt = table.insert().values(row._asdict())
tgt_conn.execute(stmt)
tgt_conn.commit()

Related

Increase speed of SQLAlchemy Insert.execute()

Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()

Errors while joining two tables using sqlalchemy

When I tried to join two tables I got the following error:
sqlalchemy.exc.ObjectNotExecutableError: Not an executable object: sqlalchemy.sql.selectable.Join at 0x7f31a35b02e8; Join object on
chanel(139851192912136) and Device(139851192912864)
My code is:
import sqlalchemy as db
from sqlalchemy import and_,or_,not_,inspect,text,inspection
engine = db.create_engine("mssql+pymssql://sa:elnetsrv#192.108.55.95/ELNetDB")
Data1 = db.Table("chanel", metadata, autoload=True, autoload_with=engine)
Data2 = db.Table("Device",metadata,autoload = True,autoload_with = engine)
metadata = db.MetaData()
j = Data1.join(Data2,Data1.columns.No == Data2.columns.ID)
print(engine.execute(j))
Data1.join(Data2,Data1.columns.No == Data2.columns.ID) is not executable because it is not a query object.
You can try this instead (assuming you want to select every column from Data1):
print( engine.execute(select([Data1]).select_from(j) )
see https://docs.sqlalchemy.org/en/13/core/metadata.html#sqlalchemy.schema.Table.join for reference.

Create sqlite database from Access

I would like to create a sqlite Database coming from an access database backend.
In case of 64bit/32bit --> pyocdb doesnt work. So I exported some excel files.
A fast solution to start would be:
import os
import pandas as pd
from sqlalchemy import create_engine
#load all files in folder
folder = "...some start folder"
files = {file.split('.')[0]:os.path.join(folder, file)
for file in os.listdir(folder) if file.endswith('.xlsx')}
list_dfs = {name:pd.read_excel(file) for name,file in files.items()}
#initialize a sqlite database
engine = create_engine('sqlite:///sql.db', echo=False)
#drop tables to sql
for key, frame in list_dfs.items():
frame.to_sql(key, con=engine, if_exists='append',index=False,index_label='ID')
I could add some dTypes in frame.to_sql within a dict.
I struggle building the relations between the tables.
sqlalchemy seems to be a good solution, but is it possible to format an existing database?
regards
inco
#PowerStat, thanks for the correction.
My current solution seems to work so far:
import os
import pandas as pd
from sqlalchemy import (create_engine, MetaData, Table, Column, Integer,
String, ForeignKey, DateTime, Float)
#%%load all excel filenames into two directories main/id
folder = "some_path"
ID_files = {file.split('.')[0]:os.path.join(folder, file)
for file in os.listdir(folder) if "ID_" in str(file) and file.endswith('.xlsx')}
main_files={file.split('.')[0]:os.path.join(folder, file)
for file in os.listdir(folder) if not "ID_" in str(file) and file.endswith('.xlsx')}
#create a list of dataframes
ID_dfs = {name:pd.read_excel(file) for name,file in ID_files.items()}
main_dfs = {name:pd.read_excel(file) for name,file in main_files.items()}
#%%initialize meta data for all tables
engine = create_engine('sqlite:///sql.db', echo=False)
meta = MetaData()
for key, frame in ID_dfs.items():
table = Table(key, meta,
Column('ID',Integer,primary_key = True),
Column('Title',String, unique=True))
table = Table('table_of_things', meta,
Column('ID',Integer, primary_key = True),
Column('Book_ID',Integer,ForeignKey('ID_Book.ID')),
Column('Article_ID',Integer,ForeignKey('ID_Article.ID')))
meta.create_all(engine)
#add the Dataframes to sql-database
for key, frame in ID_dfs.items():
frame.to_sql(key, engine, if_exists='append',index=False, index_label='ID')
main_dfs['table_of_things'].to_sql('table_of_things', engine,
if_exists='append',index=False,index_label='ID')
With a second script, I load the database and make a query:
from sqlalchemy.ext.automap import automap_base, generate_relationship
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
def _gen_relationship(base, direction, return_fn,
attrname, local_cls, refferred_cls, **kw):
return generate_relationship(base, direction, return_fn, attrname, local_cls, refferred_cls, **kw)
Base = automap_base()
# engine, suppose it has two tables 'user' and 'address' set up
engine = create_engine("sqlite:///sql.db")
# reflect the tables
Base.prepare(engine, reflect=True, generate_relationship=_gen_relationship)
#table to classvariable
tob = Base.classes['table_of_things']
session = Session(engine)
for inst in session.query(tob).order_by(tob.ID):
print(inst.Book_ID, inst.Article_ID)
The output gives me the ID values ​​but not the title values, how can I use the one2many relationship correctly?
Answer:
The relationships is defined adequatly!
The instance object (inst) already contains id_book and id_article:
for inst in session.query(tob).order_by(tob.ID):
print(inst.id_book.Title, inst.id_article.Title)

SQLAlchemy - copy schema and data of subquery to another database

I am trying to copy data from a subquery from postgres (from_engine) to sqlite database. I can achieve this for copying a table using following command:
smeta = MetaData(bind=from_engine)
table = Table(table_name, smeta, autoload=True)
table.metadata.create_all(to_engine)
However, I am not sure how to achieve the same for a subquery statement.
-Sandeep
Edit:
Follow up on the answer. Once I have created the table I want to create a subquery stmt as follows:
table = Table("newtable", dest_metadata, *columns)
stmt = dest_session.query(table).subquery();
However, the last stmt ends up with error
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (ProgrammingError) relation "newtable" does not exist
LINE 3: FROM newtable) AS anon_1
One way that works at least in some cases:
Use column_descriptions of a query object to get some information about the columns in the result set.
With that information you can build the schema to create the new table in the other database.
Run the query in the source database and insert the results into the new table.
First of some setup for the example:
from sqlalchemy import create_engine, MetaData,
from sqlalchemy import Column, Integer, String, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
# Engine to the database to query the data from
# (postgresql)
source_engine = create_engine('sqlite:///:memory:', echo=True)
SourceSession = sessionmaker(source_engine)
# Engine to the database to store the results in
# (sqlite)
dest_engine = create_engine('sqlite:///:memory:', echo=True)
DestSession = sessionmaker(dest_engine)
# Create some toy table and fills it with some data
Base = declarative_base()
class Pet(Base):
__tablename__ = 'pets'
id = Column(Integer, primary_key=True)
name = Column(String)
race = Column(String)
Base.metadata.create_all(source_engine)
sourceSession = SourceSession()
sourceSession.add(Pet(name="Fido", race="cat"))
sourceSession.add(Pet(name="Ceasar", race="cat"))
sourceSession.add(Pet(name="Rex", race="dog"))
sourceSession.commit()
Now to the interesting bit:
# This is the query we want to persist in a new table:
query= sourceSession.query(Pet.name, Pet.race).filter_by(race='cat')
# Build the schema for the new table
# based on the columns that will be returned
# by the query:
metadata = MetaData(bind=dest_engine)
columns = [Column(desc['name'], desc['type']) for desc in query.column_descriptions]
column_names = [desc['name'] for desc in query.column_descriptions]
table = Table("newtable", metadata, *columns)
# Create the new table in the destination database
table.create(dest_engine)
# Finally execute the query
destSession = DestSession()
for row in query:
destSession.execute(table.insert(row))
destSession.commit()
There should be more efficient ways to do the last loop. But bulk-insert is another topic.
You can also go through a pandas data frame. For example a method would use pandas.read_sql(query, source.connection) and df.to_sql(table_name, con=destination.connection).

creating a temporary table from a query using sqlalchemy orm

I can create a temporary table this way:
session.execute("CREATE TABLE temptable SELECT existingtable.id, "
"existingtable.column2 FROM existingtable WHERE existingtable.id<100000")
but the new table is unreadable because it says it has no primary key. existingtable.id is the primary key of exisitingtable, so I expected it to get the same treatment in the temp table.
However, I would rather find some ORM way of doing this anyway. Given:
temp_table = Table('temptable', metadata,
Column('id', Integer, primary_key=True),
Column('column2', Integer),
useexisting=True )
class TempTable(object):
pass
mapper(TempTable, temp_table)
temp_table.create(bind=session.bind, checkfirst=True)
if session.query(TempTable).delete(): #make sure it's empty
session.commit()
How can I populate temp_table with some selected contents of existingtable without doing 100000 session.query.add(TempTable(...)) commands? Or is there a way of creating the table from a query similar to the plain SQL version above?
It's not exactly ORM, but to create the table initially, I'd clone the table structure (see cloneTable in the example below). For copying the data, I then would use the InsertFromSelect example.
Edit: Since version 0.8.3, SqlAlchemy supports Insert.from_select() out of the box. Hence the InsertFromSelect class and the respective visitor in the example below can be directly replaced and are no longer needed. I leave the original example unchanged for historic reasons.
Here is a working example
from sqlalchemy import Table
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import UpdateBase
class InsertFromSelect(UpdateBase):
def __init__(self, table, select):
self.table = table
self.select = select
#compiles(InsertFromSelect)
def visit_insert_from_select(element, compiler, **kw):
return "INSERT INTO %s %s" % (
compiler.process(element.table, asfrom=True),
compiler.process(element.select)
)
def cloneTable(name, table, metadata):
cols = [c.copy() for c in table.columns]
constraints = [c.copy() for c in table.constraints]
return Table(name, metadata, *(cols + constraints))
# test data
from sqlalchemy import MetaData, Column, Integer
from sqlalchemy.engine import create_engine
e = create_engine('sqlite://')
m = MetaData(e)
t = Table('t', m, Column('id', Integer, primary_key=True),
Column('number', Integer))
t.create()
e.execute(t.insert().values(id=1, number=3))
e.execute(t.insert().values(id=9, number=-3))
# create temp table
temp = cloneTable('temp', t, m)
temp.create()
# copy data
ins = InsertFromSelect(temp, t.select().where(t.c.id>5))
e.execute(ins)
# print result
for r in e.execute(temp.select()):
print(r)

Categories

Resources