engine = sqlalchemy.engine.create_engine('mysql://py:123#localhost/py', echo=True)
con = engine.connect()
res = con.execute("insert into user set name = %s", 'A')
How do I write this query to a (.sql) file (or how do I assign it to a variable)?
I'd rather not do all the escaping myself.
You can use literal_binds to produce an SQL statement with the values embedded:
import sqlalchemy as sa
# …
user = sa.Table("user", sa.MetaData(), autoload_with=engine)
user_insert = sa.insert(user).values(name="A")
compiled_text = str(
user_insert.compile(compile_kwargs={"literal_binds": True})
)
print(compiled_text)
# INSERT INTO "user" (name) VALUES ('A')
However, be careful about using that with untrusted inputs as there is still the possibility of SQL injection issues.
Related
Consider following working code of copy a souce sqlite database to target sqlite database:
# Create two database.
import sqlite3
import pandas as pd
import time
cn_src = sqlite3.connect('source.db')
df=pd.DataFrame({"x":[1,2],"y":[2.0,3.0]})
df.to_sql("A", cn_src, if_exists="replace", index=False)
cn_tgt = sqlite3.connect('target.db')
cn_src.close()
cn_tgt.close()
from sqlalchemy import create_engine, MetaData, event
from sqlalchemy.sql import sqltypes
# create sqlalchemy conneciton
src_engine = create_engine("sqlite:///source.db")
src_metadata = MetaData(bind=src_engine)
exclude_tables = ('sqlite_master', 'sqlite_sequence', 'sqlite_temp_master')
tgt_engine = create_engine("sqlite:///target.db")
tgt_metadata = MetaData(bind=tgt_engine)
#event.listens_for(src_metadata, "column_reflect")
def genericize_datatypes(inspector, tablename, column_dict):
column_dict["type"] = column_dict["type"].as_generic(allow_nulltype=True)
tgt_conn = tgt_engine.connect()
tgt_metadata.reflect()
# delete tables in target database.
for table in reversed(tgt_metadata.sorted_tables):
if table.name not in exclude_tables:
print('dropping table =', table.name)
table.drop()
tgt_metadata.clear()
tgt_metadata.reflect()
src_metadata.reflect()
# copy table
for table in src_metadata.sorted_tables:
if table.name not in exclude_tables:
table.create(bind=tgt_engine)
# Update meta information
tgt_metadata.clear()
tgt_metadata.reflect()
# Copy data
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert()
for index, row in enumerate(src_table.select().execute()):
print("table =", table.name, "Inserting row", index)
start=time.time()
stmt.execute(row._asdict())
end=time.time()
print(end-start)
The code was mainly borrowed from other source. The problem is the time end-start is about 0.017 in my computer which is too large. Is there any way to speed up? I have tried set isolation_level=None in create_engine but no luck.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
It seems like that Insert object has no executemany method so we can't use bulk inserting.
SQLAlchemy does not implement separate execute() and executemany() methods. Its execute() method looks at the parameters it receives and
if they consist of a single dict object (i.e., a single row) then it calls execute() at the driver level, or
if they consist of a list of dict objects (i.e., multiple rows) then it calls executemany() at the driver level.
Note also that you are using deprecated usage patterns, specifically MetaData(bind=…). You should be doing something more like this:
import sqlalchemy as sa
engine = sa.create_engine("sqlite://")
tbl = sa.Table(
"tbl",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
tbl.create(engine)
with engine.begin() as conn:
stmt = sa.insert(tbl)
params = [
dict(id=1, txt="foo"),
dict(id=2, txt="bar"),
]
conn.execute(stmt, params)
# check results
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM tbl").all())
# [(1, 'foo'), (2, 'bar')]
I come up with a solution using transaction:
# Copy data
trans=tgt_conn.begin()
for table in tgt_metadata.sorted_tables:
src_table = src_metadata.tables[table.name]
stmt = table.insert().execution_options(autocommit=False)
for index, row in enumerate(src_table.select().execute()):
tgt_conn.execute(stmt, row._asdict()) # must use tgt_conn.execute(), not stmt.execute()
trans.commit()
tgt_conn.close()
I have the following code:
from sqlalchemy import create_engine
SCHEMA = 'dev_gba'
TABLE = 'dev_l1c_v2'
USER = 'db-user'
PASSWORD = '-'
ENDPOINT = '-.us-east-1.rds.amazonaws.com'
process_start = 'SOME_VAL'
process_end = 'SOME_VAL'
granule_id = 'A006202_20160829T191558'
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}#{ENDPOINT}/{SCHEMA}")
connection = engine.raw_connection()
try:
cursor_obj = connection.cursor()
cursor_obj.execute(f'UPDATE {SCHEMA}.{TABLE} SET PROCESS_START_TIME = "{process_start}", PROCESS_END_TIME = "{process_end}" WHERE {SCHEMA}.{TABLE}.GRANULE_ID = "{granule_id}"')
cursor_obj.close()
finally:
connection.close()
If I select all from the database, then I can see the rows that are not updated. However, if I print the statement like so:
print(f'UPDATE {SCHEMA}.{TABLE} SET PROCESS_START_TIME = "{process_start}", PROCESS_END_TIME = "{process_end}" WHERE {SCHEMA}.{TABLE}.GRANULE_ID = "{granule_id}"')
The output is:
UPDATE dev_gba.dev_l1c_v2 SET PROCESS_START_TIME = "SOME_VAL", PROCESS_END_TIME = "SOME_VAL" WHERE dev_gba.dev_l1c_v2.GRANULE_ID = "A006202_20160829T191558"
If I take this and copy and paste it into MySQL workbench, it will execute, and I can see that the rows were updated.
I have disabled safe update in the workbench and have tried adding it before my execute statement as
cursor_obj.execute('SET SQL_SAFE_UPDATES = 0')
but that also doesn't work. Here is the other thing that is confusing, earlier in my code I run the following:
connection = engine.raw_connection()
try:
cursor_obj = connection.cursor()
cursor_obj.execute(f'CREATE TEMPORARY TABLE {TEMP_TABLE} SELECT {TABLE}.index FROM {SCHEMA}.{TABLE} WHERE IN_PROGRESS = 0 AND PROCESSED = 0 ORDER BY RAND() LIMIT {CPU_COUNT}')
cursor_obj.execute(f'UPDATE {SCHEMA}.{TABLE} SET IN_PROGRESS = 1, INSTANCE_ID = "{INSTANCE_ID}" WHERE {SCHEMA}.{TABLE}.index IN (SELECT {TEMP_TABLE}.index FROM {TEMP_TABLE})')
cursor_obj.execute(f'SELECT BASE_URL FROM {SCHEMA}.{TABLE} WHERE {SCHEMA}.{TABLE}.index IN (SELECT {TEMP_TABLE}.index FROM {TEMP_TABLE})')
result = cursor_obj.fetchall()
cursor_obj.execute(f'DROP TABLE {TEMP_TABLE}')
cursor_obj.close()
finally:
connection.close()
The update statement in this code works just fine and there are no issues. I have also tried adding echo=True to my create engine line:
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}#{ENDPOINT}/{SCHEMA}", echo = True)
The output is:
2021-12-31 10:17:09,613 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'sql_mode'
2021-12-31 10:17:09,616 INFO sqlalchemy.engine.Engine [raw sql] {}
2021-12-31 10:17:09,700 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2021-12-31 10:17:09,701 INFO sqlalchemy.engine.Engine [generated in 0.00143s] {}
2021-12-31 10:17:09,858 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2021-12-31 10:17:09,859 INFO sqlalchemy.engine.Engine [raw sql] {}
Which isn't very useful.
I have also tried:
from sqlalchemy.sql import text
cursor_obj.execute(text(f'UPDATE {SCHEMA}.{TABLE} SET PROCESS_START_TIME = "{process_start}", PROCESS_END_TIME = "{process_end}" WHERE {SCHEMA}.{TABLE}.GRANULE_ID = "{granule_id}"'))
This gives the following error:
TypeError: object of type 'TextClause' has no len()
Not really sure where to go from here.
Using string formatting to create SQL statements in Python is error-prone and should be avoided if there are better tools available.
You could run the raw query like this using SQLAlchemy core like this, without having to drop down to the raw connection:
import sqlalchemy as sa
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}#{ENDPOINT}/{SCHEMA}")
# Reflect the database table into an object
tbl = sa.Table(TABLE, sa.MetaData(), autoload_with=engine)
# Create an update object
upd = sa.update(tbl).where(tbl.c.GRANULE_ID == granule_id).values(PROCESS_START_TIME=process_start_time, PROCESS_END_TIME=process_end_time)
# The "begin" context manager will automatically commit on exit
with engine.begin() as conn:
conn.execute(upd)
If you need to use raw SQL, you can do it like this (see Using Textual SQL):
# We need to use string formatting to set the table; SQLAlchemy will automatically qualify it with the schema name.
stmt = f'UPDATE {TABLE} SET PROCESS_START_TIME = :process_start_time, PROCESS_END_TIME = :process_end_time WHERE {TABLE}.GRANULE_ID = :granule_id'
values = {
'process_start_time': process_start_time,
'process_end_time': process_end_time,
'granule_id': granule_id,
}
with engine.begin() as conn:
conn.execute(sa.text(stmt), values)
I'm aware that the best way to prevent sql injection is to write Python queries of this form (or similar):
query = 'SELECT %s %s from TABLE'
fields = ['ID', 'NAME']
cur.execute(query, fields)
The above will work for a single query, but what if we want to do a UNION of 2 SQL commands? I've set this up via sqlite3 for sake of repeatability, though technically I'm using pymysql. Looks as follows:
import sqlite3
conn = sqlite3.connect('dummy.db')
cur = conn.cursor()
query = 'CREATE TABLE DUMMY(ID int AUTO INCREMENT, VALUE varchar(255))'
query2 = 'CREATE TABLE DUMMy2(ID int AUTO INCREMENT, VALUE varchar(255)'
try:
cur.execute(query)
cur.execute(query2)
except:
print('Already made table!')
tnames = ['DUMMY1', 'DUMMY2']
sqlcmds = []
for i in range(0,2):
query = 'SELECT %s FROM {}'.format(tnames[i])
sqlcmds.append(query)
fields = ['VALUE', 'VALUE']
sqlcmd = ' UNION '.join(sqlcmds)
cur.execute(sqlcmd, valid_fields)
When I run this, I get a sqlite Operational Error:
sqlite3.OperationalError: near "%": syntax error
I've validated the query prints as expected with this output:
INSERT INTO DUMMY VALUES(%s) UNION INSERT INTO DUMMY VALUES(%s)
All looks good there. What is the issue with the string substitutions here? I can confirm that running a query with direct string substitution works fine. I've tried it with both selects and inserts.
EDIT: I'm aware there are multiple ways to do this with executemany and a few other. I need to do this with UNION for the purposes I'm using this for because this is a very, very simplified example fo the operational code I'm using
The code below executes few INSERTS at once
import sqlite3
conn = sqlite3.connect('dummy.db')
cur = conn.cursor()
query = 'CREATE TABLE DUMMY(ID int AUTO INCREMENT NOT NULL, VALUE varchar(255))'
try:
cur.execute(query)
except:
print('Already made table!')
valid_fields = [('ya dummy',), ('stupid test example',)]
cur.executemany('INSERT INTO DUMMY (VALUE) VALUES (?)',valid_fields)
I am trying to update some values into a database. The user can give the row that should be changed. The input from the user, however is a string. When I try to parse this into the MySQL connector with python it gives an error because of the apostrophes. The code I have so far is:
import mysql.connector
conn = mysql.connector
conn = connector.connect(user=dbUser, password=dbPasswd, host=dbHost, database=dbName)
cursor = conn.cursor()
cursor.execute("""UPDATE Search SET %s = %s WHERE searchID = %s""", ('maxPrice', 300, 10,))
I get this error
mysql.connector.errors.ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''maxPrice' = 300 WHERE searchID = 10' at line 1
How do I get rid of the apostrophes? Because I think they are causing problems.
As noted, you can't prepare it using a field.
Perhaps the safest way is to allow only those fields that are expected, e.g.
#!/usr/bin/python
import os
import mysql.connector
conn = mysql.connector.connect(user=os.environ.get('USER'),
host='localhost',
database='sandbox',
unix_socket='/var/run/mysqld/mysqld.sock')
cur = conn.cursor(dictionary=True)
query = """SELECT column_name
FROM information_schema.columns
WHERE table_schema = DATABASE()
AND table_name = 'Search'
"""
cur.execute(query)
fields = [x['column_name'] for x in cur.fetchall()]
user_input = ['maxPrice', 300, 10]
if user_input[0] in fields:
cur.execute("""UPDATE Search SET {0} = {1} WHERE id = {1}""".format(user_input[0], '%s'),
tuple(user_input[1:]))
print cur.statement
Prints:
UPDATE Search SET maxPrice = 300 WHERE id = 10
Where:
mysql> show create table Search\G
*************************** 1. row ***************************
Search
CREATE TABLE `Search` (
`id` int(11) DEFAULT NULL,
`maxPrice` float DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1
A column name is not a parameter. Put the column name maxPrice directly into your SQL.
cursor.execute("""UPDATE Search SET maxPrice = %s WHERE searchID = %s""", (300, 10))
If you want to use the same code with different column names, you would have to modify the string itself.
sql = "UPDATE Search SET {} = %s WHERE searchID = %s".format('maxPrice')
cursor.execute(sql, (300,10))
But bear in mind that this is not safe from injection the way parameters are, so make sure your column name is not a user-input string or anything like that.
You cannot do it like that. You need to place the column name in the string before you call cursor.execute. Column names cannot be used when transforming variables in cursor.execute.
Something like this would work:
sql = "UPDATE Search SET {} = %s WHERE searchID = %s".format('maxPrice')
cursor.execute(sql, (300, 10,))
You cannot dynamically bind object (e.g., column) names, only values. If that's the logic you're trying to achieve, you'd have to resort to string manipulation/formatting (with all the risks of SQL-injection attacks that come with it). E.g.:
sql = """UPDATE Search SET {} = %s WHERE searchID = %s""".format('maxPrice')
cursor.execute(sql, (300, 10,))
When I try to pass a tuple to the IN argument of a WHERE clause, it gets double-quoted so my query fails. For example, if I do this,
# Connect to DB
import MySQLdb
cnxn = MySQLdb.connect(connectString)
curs = cnxn.cursor()
# Setup query
accounts = ('Hyvaco','TLC')
truck_type = 'fullsize'
query_args = (truck_type, accounts)
sql ='SELECT * FROM archive.incoming WHERE LastCapacity=%s AND Account IN %s'
# Run query and print
curs.execute(sql, query_args)
print(curs._executed)
then I get zero rows back, and the query prints out as
SELECT * FROM archive.incoming WHERE LastCapacity='fullsize'
AND Account IN ("'Hyvaco'", "'TLC'")
Switching accounts from a tuple to a list does not affect the result. How should I be passing these arguments?
How about you create the accounts as a string and then do this:
accounts = "('Hyvaco','TLC')"
sql ='SELECT * FROM archive.incoming WHERE LastCapacity=%s AND Account IN '+ accounts