I need to create a db in MySQL using SQLAlchemy, I am able to connect to a db if it already exists, but I want to be able to create it if it does not exist. These are my tables:
#def __init__(self):
Base = declarative_base()
class utente(Base):
__tablename__="utente"
utente_id=Column(Integer,primary_key=True)
nome_utente=Column(Unicode(20))
ruolo=Column(String(10))
MetaData.create_all()
def __repr(self):
return "utente: {0}, {1}, id: {2}".format(self.ruolo,self.nome_utente,self.utente_id)
class dbmmas(Base):
__tablename__="dbmmas"
db_id=Column(Integer,primary_key=True,autoincrement=True)
nome_db=Column(String(10))
censimento=Column(Integer)
versione=Column(Integer)
ins_data=Column(DateTime)
mod_data=Column(DateTime)
ins_utente=Column(Integer)
mod_utente=Column(Integer)
MetaData.create_all()
def __repr(self):
return "dbmmas: {0}, censimento {1}, versione {2}".format(self.nome_db,self.censimento,self.versione)
class funzione(Base):
__tablename__="funzione"
funzione_id=Column(Integer,primary_key=True,autoincrement=True)
categoria=Column(String(10))
nome=Column(String(20))
def __repr__(self):
return "funzione:{0},categoria:{1},id:{2} ".format(self.nome,self.categoria,self.funzione_id)
class profilo(Base):
__tablename__="rel_utente_funzione"
utente_id=Column(Integer,primary_key=True)
funzione_id=Column(Integer,primary_key=True)
amministratore=Column(Integer)
MetaData.create_all()
def __repr(self):
l=lambda x: "amministratore" if x==1 else "generico"
return "profilo per utente_id:{0}, tipo: {1}, funzione_id: {2}".format(self.utente_id,l(self.amministratore),self.funzione_id)
class aree(Base):
__tablename__="rel_utente_zona"
UTB_id=Column(String(10), primary_key=True) # "in realta' si tratta della seatureSignature della feature sullo shapefile"
utente_id=Column(Integer, primary_key=True)
amministratore=Column(Integer)
MetaData.create_all()
def __repr(self):
l=lambda x: "amministratore" if x==1 else "generico"
return "zona: {0}, pe utente_id:{1}, {2}".format(self.UTB_id,self.utente_id,l(self.amministratore))
class rel_utente_dbmmas(Base):
__tablename__="rel_utente_dbmmas"
utente_id=Column(Integer,primary_key=True)
db_id=Column(Integer,primary_key=True)
amministratore=(Integer)
MetaData.create_all()
def __repr(self):
l=lambda x: "amministratore" if x==1 else "generico"
return "dbregistrato: {0} per l'utente{1} {2}".format(self.db_id,self.utente_id,l(self.amministratore))
To create a mysql database you just connect to the server an create the database:
import sqlalchemy
engine = sqlalchemy.create_engine('mysql://user:password#server') # connect to server
engine.execute("CREATE DATABASE dbname") #create db
engine.execute("USE dbname") # select new db
# use the new db
# continue with your work...
of course your user has to have the permission to create databases.
You can use SQLAlchemy-Utils for that.
pip install sqlalchemy-utils
Then you can do things like
from sqlalchemy_utils import create_database, database_exists
url = 'mysql://{0}:{1}#{2}:{3}'.format(user, pass, host, port)
if not database_exists(url):
create_database(url)
I found the answer here, it helped me a lot.
I don't know what the canonical way is, but here's a way to check to see if a database exists by checking against the list of databases, and to create it if it doesn't exist.
from sqlalchemy import create_engine
# This engine just used to query for list of databases
mysql_engine = create_engine('mysql://{0}:{1}#{2}:{3}'.format(user, pass, host, port))
# Query for existing databases
existing_databases = mysql_engine.execute("SHOW DATABASES;")
# Results are a list of single item tuples, so unpack each tuple
existing_databases = [d[0] for d in existing_databases]
# Create database if not exists
if database not in existing_databases:
mysql_engine.execute("CREATE DATABASE {0}".format(database))
print("Created database {0}".format(database))
# Go ahead and use this engine
db_engine = create_engine('mysql://{0}:{1}#{2}:{3}/{4}'.format(user, pass, host, port, db))
Here's an alternative method if you don't need to know if the database was created or not.
from sqlalchemy import create_engine
# This engine just used to query for list of databases
mysql_engine = create_engine('mysql://{0}:{1}#{2}:{3}'.format(user, pass, host, port))
# Query for existing databases
mysql_engine.execute("CREATE DATABASE IF NOT EXISTS {0} ".format(database))
# Go ahead and use this engine
db_engine = create_engine('mysql://{0}:{1}#{2}:{3}/{4}'.format(user, pass, host, port, db))
CREATE DATABASE IF NOT EXISTS dbName;
Would recommend using with:
from sqlalchemy import create_engine
username = ''
password = ''
host = 'localhost'
port = 3306
DB_NAME = 'db_name'
engine = create_engine(f"mysql://{username}:{password}#{host}:{port}")
with engine.connect() as conn:
# Do not substitute user-supplied database names here.
conn.execute(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}")
The mysqlclient seems to be up to 10 times faster in benchmark tests than PyMySQL, see: What's the difference between MySQLdb, mysqlclient and MySQL connector/Python?.
Yet, why not use a Python-ready package for Python, at least, if it is not about every second of query time? PyMySQL is suggested by the following links, for example:
Using SQLAlchemy to access MySQL without frustrating library installation issues
How to connect MySQL database using Python+SQLAlchemy remotely?.
Python packages:
Install with pip, at best put in "requirements.txt":
PyMySQL
SQLAlchemy
Again, if it is about the best speed of the query, use mysqlclient package. Then you need to install an additional Linux package with sudo apt-get install libmysqlclient-dev.
import statements
Only one needed:
import sqlalchemy
Connection string (= db_url)
Connection string starting with {dialect/DBAPI}+{driver}:
db_url = mysql+pymysql://
where pymysql stands for the used Python package "PyMySQL" as the driver.
Again, if it is about the best speed of the query, use mysqlclient package. Then you need mysql+msqldb:// at this point.
For a remote connection, you need to add to the connection string:
host
user
password
database
port (the port only if it is not the standard 3306)
You can create your db_url with several methods. Do not write user and password and at best any other variable value directly in the string to avoid possible attacks:
sqlalchemy.engine.URL.create(), or with .url.URL, see an example at Connecting from Cloud Functions to Cloud SQL or an example which automatically adds ? suffixes, for example ?driver=SQL+Server, at the end of the string at Building a connection URL for mssql+pyodbc with sqlalchemy.engine.url.URL
f"""...{my_var}..."""
"""...{my_var}...""".format(my_var=xyz_var)
...
Example without the url helper of SQLAlchemy:
db_url = "{dialect}+{driver}://{user}:{password}#{host}:{port}/{database}".format(
or:
db_url = "{dialect}+{driver}://{user}:{password}#{host}/{database}?host={host}?port={port}".format(
dialect = 'mysql',
driver = 'pymysql',
username=db_user,
password=db_pass,
database=db_name,
host=db_host,
port=db_port
)
Other engine configurations
For other connection drivers, dialects and methods, see the SQLAlchemy 1.4 Documentation - Engine Configuration
Create the db if not exists
See How to create a new database using SQLAlchemy?.
engine = sqlalchemy.create_engine(db_url)
if not sqlalchemy.database_exists(engine.url):
create_database(engine.url)
with engine.connect() as conn:
conn.execute("commit")
conn.execute("create database test")
Related
I'm trying to connect to a postgres db using SQL Alchemy and the pg8000 driver. I'd like to specify a search path for this connection. With the Psycopg driver, I could do this by doing something like
engine = create_engine(
'postgresql+psycopg2://dbuser#dbhost:5432/dbname',
connect_args={'options': '-csearch_path={}'.format(dbschema)})
However, this does not work for the pg8000 driver. Is there a good way to do this?
You can use pg8000 pretty much in the same way as psycopg2, just need to swap scheme from postgresql+psycopg2 to postgresql+pg8000.
The full connection string definition is in the SQLAlchemy pg8000 docs:
postgresql+pg8000://user:password#host:port/dbname[?key=value&key=value...]
But while psycopg2.connect will pass kwargs to the server (like options and its content), pg8000.connect will not, so there is no setting search_path with pg8000.
The SQLAlchemy docs describe how to do this. For example:
from sqlalchemy import create_engine, event, text
engine = create_engine("postgresql+pg8000://postgres:postgres#localhost/postgres")
#event.listens_for(engine, "connect", insert=True)
def set_search_path(dbapi_connection, connection_record):
existing_autocommit = dbapi_connection.autocommit
dbapi_connection.autocommit = True
cursor = dbapi_connection.cursor()
cursor.execute("SET SESSION search_path='myschema'")
cursor.close()
dbapi_connection.autocommit = existing_autocommit
with engine.connect() as connection:
result = connection.execute(text("SHOW search_path"))
for row in result:
print(row)
However, as it says in the docs:
SQLAlchemy is generally organized around the concept of keeping this
variable at its default value of public
In Amazon Redshift's Getting Started Guide, it's mentioned that you can utilize SQL client tools that are compatible with PostgreSQL to connect to your Amazon Redshift Cluster.
In the tutorial, they utilize SQL Workbench/J client, but I'd like to utilize python (in particular SQLAlchemy). I've found a related question, but the issue is that it does not go into the detail or the python script that connects to the Redshift Cluster.
I've been able to connect to the cluster via SQL Workbench/J, since I have the JDBC URL, as well as my username and password, but I'm not sure how to connect with SQLAlchemy.
Based on this documentation, I've tried the following:
from sqlalchemy import create_engine
engine = create_engine('jdbc:redshift://shippy.cx6x1vnxlk55.us-west-2.redshift.amazonaws.com:5439/shippy')
ERROR:
Could not parse rfc1738 URL from string 'jdbc:redshift://shippy.cx6x1vnxlk55.us-west-2.redshift.amazonaws.com:5439/shippy'
I don't think SQL Alchemy "natively" knows about Redshift. You need to change the JDBC "URL" string to use postgres.
jdbc:postgres://shippy.cx6x1vnxlk55.us-west-2.redshift.amazonaws.com:5439/shippy
Alternatively, you may want to try using sqlalchemy-redshift using the instructions they provide.
I was running into the exact same issue, and then I remembered to include my Redshift credentials:
eng = create_engine('postgresql://[LOGIN]:[PASSWORD]#shippy.cx6x1vnxlk55.us-west-2.redshift.amazonaws.com:5439/shippy')
sqlalchemy-redshift is works for me, but after few days of reserch
packages (python3.4):
SQLAlchemy==1.0.14 sqlalchemy-redshift==0.5.0 psycopg2==2.6.2
First of all, I checked, that my query is working workbench (http://www.sql-workbench.net), then I force it work in sqlalchemy (this https://stackoverflow.com/a/33438115/2837890 helps to know that auto_commit or session.commit() must be):
db_credentials = (
'redshift+psycopg2://{p[redshift_user]}:{p[redshift_password]}#{p[redshift_host]}:{p[redshift_port]}/{p[redshift_database]}'
.format(p=config['Amazon_Redshift_parameters']))
engine = create_engine(db_credentials, connect_args={'sslmode': 'prefer'})
connection = engine.connect()
result = connection.execute(text(
"COPY assets FROM 's3://xx/xx/hello.csv' WITH CREDENTIALS "
"'aws_access_key_id=xxx_id;aws_secret_access_key=xxx'"
" FORMAT csv DELIMITER ',' IGNOREHEADER 1 ENCODING UTF8;").execution_options(autocommit=True))
result = connection.execute("select * from assets;")
print(result, type(result))
print(result.rowcount)
connection.close()
And after that, I forced to work sqlalchemy_redshift CopyCommand perhaps bad way, looks little tricky:
import sqlalchemy as sa
tbl2 = sa.Table(TableAssets, sa.MetaData())
copy = dialect_rs.CopyCommand(
assets,
data_location='s3://xx/xx/hello.csv',
access_key_id=access_key_id,
secret_access_key=secret_access_key,
truncate_columns=True,
delimiter=',',
format='CSV',
ignore_header=1,
# empty_as_null=True,
# blanks_as_null=True,
)
print(str(copy.compile(dialect=RedshiftDialect(), compile_kwargs={'literal_binds': True})))
print(dir(copy))
connection = engine.connect()
connection.execute(copy.execution_options(autocommit=True))
connection.close()
We make just that I made with sqlalchemy, excute query, except comine query by CopyCommand. I have not see some profit :(.
The following works for me with Databricks on all kinds of SQLs
import sqlalchemy as SA
import psycopg2
host = 'your_host_url'
username = 'your_user'
password = 'your_passw'
port = 5439
url = "{d}+{driver}://{u}:{p}#{h}:{port}/{db}".\
format(d="redshift",
driver='psycopg2',
u=username,
p=password,
h=host,
port=port,
db=db)
engine = SA.create_engine(url)
cnn = engine.connect()
strSQL = "your_SQL ..."
try:
cnn.execute(strSQL)
except:
raise
import sqlalchemy as db
engine = db.create_engine('postgres://username:password#url:5439/db_name')
This worked for me
Ok so here is my use case. I've to make connection to different types of DB(MSSQL, oracl, MYSQL, etc.). I've .sql files for each of these database. As it seems sqlalchemy can't run .sql file so we need to open and execute the statements one by one from the .sql files over the connection.
So guys, I'm having this information and I wanted to connect using SQL Alchemy.
<db type="MSSQL" version="2005" patch_level="SP2" port="1433" id="MSSQLSERVER"/>
here MSSQLServer is the instance. No DB information is provide. so do I need DB name to connect to DB?
this is my command
engine = create_engine('mssql+pyodbc://sa:pass#172.21.153.227/MSSQLSERVER', echo=True)
this is my complete code
from sqlalchemy.engine import create_engine
engine = create_engine('mssql+pyodbc://sa:pass#172.21.153.227', echo=False)
connection = engine.connect()
connection.execute(
"""
select ##version
"""
)
connection.close()
you don't need a db name, you can use this function i wrote:
(it works for me on mySQL)
def ConnectToDB(self, server, uid, password):
"""
this method if for connecting to a db
#param server: server name
#param uid: username
#param password: password
"""
connection_string = 'mysql://{}:{}#{}'.format(uid, password, server)
try:
self.engine = create_engine(connection_string)
self.connection = self.engine.connect()
except exc.SQLAlchemyError, e:
self.engine = None
return False, e
return True, None
in your SQL statements you will say the DB and table some thing like this:
INSERT INTO `dbName`.`dbTable`.........
I'm guessing this is a pretty basic question, but I can't figure out why:
import psycopg2
psycopg2.connect("postgresql://postgres:postgres#localhost/postgres")
Is giving the following error:
psycopg2.OperationalError: missing "=" after
"postgresql://postgres:postgres#localhost/postgres" in connection info string
Any idea? According to the docs about connection strings I believe it should work, however it only does like this:
psycopg2.connect("host=localhost user=postgres password=postgres dbname=postgres")
I'm using the latest psycopg2 version on Python2.7.3 on Ubuntu12.04
I would use the urlparse module to parse the url and then use the result in the connection method. This way it's possible to overcome the psycop2 problem.
from urlparse import urlparse # for python 3+ use: from urllib.parse import urlparse
result = urlparse("postgresql://postgres:postgres#localhost/postgres")
username = result.username
password = result.password
database = result.path[1:]
hostname = result.hostname
port = result.port
connection = psycopg2.connect(
database = database,
user = username,
password = password,
host = hostname,
port = port
)
The connection string passed to psycopg2.connect is not parsed by psycopg2: it is passed verbatim to libpq. Support for connection URIs was added in PostgreSQL 9.2.
To update on this, Psycopg3 does actually include a way to parse a database connection URI.
Example:
import psycopg # must be psycopg 3
pg_uri = "postgres://jeff:hunter2#example.com/db"
conn_dict = psycopg.conninfo.conninfo_to_dict(pg_uri)
with psycopg.connect(**conn_dict) as conn:
...
Another option is using SQLAlchemy for this. It's not just ORM, it consists of two distinct components Core and ORM, and it can be used completely without using ORM layer.
SQLAlchemy provides such functionality out of the box by create_engine function. Moreover, via URI you can specify DBAPI driver or many various postgresql settings.
Some examples:
# default
engine = create_engine("postgresql://user:pass#localhost/mydatabase")
# psycopg2
engine = create_engine("postgresql+psycopg2://user:pass#localhost/mydatabase")
# pg8000
engine = create_engine("postgresql+pg8000://user:pass#localhost/mydatabase")
# psycopg3 (available only in SQLAlchemy 2.0, which is currently in beta)
engine = create_engine("postgresql+psycopg://user:pass#localhost/test")
And here is a fully working example:
import sqlalchemy as sa
# set connection URI here ↓
engine = sa.create_engine("postgresql://user:password#db_host/db_name")
ddl_script = sa.DDL("""
CREATE TABLE IF NOT EXISTS demo_table (
id serial PRIMARY KEY,
data TEXT NOT NULL
);
""")
with engine.begin() as conn:
# do DDL and insert data in a transaction
conn.execute(ddl_script)
conn.exec_driver_sql("INSERT INTO demo_table (data) VALUES (%s)",
[("test1",), ("test2",)])
conn.execute(sa.text("INSERT INTO demo_table (data) VALUES (:data)"),
[{"data": "test3"}, {"data": "test4"}])
with engine.connect() as conn:
cur = conn.exec_driver_sql("SELECT * FROM demo_table LIMIT 2")
for name in cur.fetchall():
print(name)
# you also can obtain raw DBAPI connection
rconn = engine.raw_connection()
SQLAlchemy provides many other benefits:
You can easily switch DBAPI implementations just by changing URI (psycopg2, psycopg2cffi, etc), or maybe even databases.
It implements connection pooling out of the box (both psycopg2 and psycopg3 has connection pooling, but API is different)
asyncio support via create_async_engine (psycopg3 also supports asyncio).
Using SQLAlchemy, an Engine object is created like this:
from sqlalchemy import create_engine
engine = create_engine("postgresql://localhost/mydb")
Accessing engine fails if the database specified in the argument to create_engine (in this case, mydb) does not exist. Is it possible to tell SQLAlchemy to create a new database if the specified database doesn't exist?
SQLAlchemy-Utils provides custom data types and various utility functions for SQLAlchemy. You can install the most recent official version using pip:
pip install sqlalchemy-utils
The database helpers include a create_database function:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
engine = create_engine("postgres://localhost/mydb")
if not database_exists(engine.url):
create_database(engine.url)
print(database_exists(engine.url))
On postgres, three databases are normally present by default. If you are able to connect as a superuser (eg, the postgres role), then you can connect to the postgres or template1 databases. The default pg_hba.conf permits only the unix user named postgres to use the postgres role, so the simplest thing is to just become that user. At any rate, create an engine as usual with a user that has the permissions to create a database:
>>> engine = sqlalchemy.create_engine("postgres://postgres#/postgres")
You cannot use engine.execute() however, because postgres does not allow you to create databases inside transactions, and sqlalchemy always tries to run queries in a transaction. To get around this, get the underlying connection from the engine:
>>> conn = engine.connect()
But the connection will still be inside a transaction, so you have to end the open transaction with a commit:
>>> conn.execute("commit")
And you can then proceed to create the database using the proper PostgreSQL command for it.
>>> conn.execute("create database test")
>>> conn.close()
It's possible to avoid manual transaction management while creating database by providing isolation_level='AUTOCOMMIT' to create_engine function:
import sqlalchemy
with sqlalchemy.create_engine(
'postgresql:///postgres',
isolation_level='AUTOCOMMIT'
).connect() as connection:
connection.execute('CREATE DATABASE my_database')
Also if you are not sure that database doesn't exist there is a way to ignore database creation error due to existence by suppressing sqlalchemy.exc.ProgrammingError exception:
import contextlib
import sqlalchemy.exc
with contextlib.suppress(sqlalchemy.exc.ProgrammingError):
# creating database as above
Extending the accepted answer using with yields:
from sqlalchemy import create_engine
engine = create_engine("postgresql://localhost")
NEW_DB_NAME = 'database_name'
with engine.connect() as conn:
conn.execute("commit")
# Do not substitute user-supplied database names here.
conn.execute(f"CREATE DATABASE {NEW_DB_NAME}")
Please note that I couldn't get the above suggestions with database_exists because whenever I check if the database exists using if not database_exists(engine.url): I get this error:
InterfaceError('(pyodbc.InterfaceError) (\'28000\', u\'[28000]
[Microsoft][SQL Server Native Client 11.0][SQL Server]Login failed for
user \\'myUser\\'. (18456) (SQLDriverConnect); [28000]
[Microsoft][SQL Server Native Client 11.0][SQL Server]Cannot open
database "MY_DATABASE" requested by the login. The login failed.
(4060); [28000] [Microsoft][SQL Server Native Client 11.0][SQL
Server]Login failed for user \\'myUser\\'. (18456); [28000]
[Microsoft][SQL Server Native Client 11.0][SQL Server]Cannot open
database "MY_DATABASE" requested by the login. The login failed.
(4060)\')',)
Also contextlib/suppress was not working and I'm not using postgres so I ended up doing this to ignore the exception if the database happens to already exist with SQL Server:
import logging
import sqlalchemy
logging.basicConfig(filename='app.log', format='%(asctime)s-%(levelname)s-%(message)s', level=logging.DEBUG)
engine = create_engine('mssql+pyodbc://myUser:mypwd#localhost:1234/MY_DATABASE?driver=SQL+Server+Native+Client+11.0?trusted_connection=yes', isolation_level = "AUTOCOMMIT")
try:
engine.execute('CREATE DATABASE ' + a_database_name)
except Exception as db_exc:
logging.exception("Exception creating database: " + str(db_exc))
If someone like me don't want to take whole sqlalchemy_utils to your project just for database creation, you can use script like this. I've come with it, based on SingleNegationElimination's answer. I'm using pydantic here (it's FastAPI project) and my imported settings for reference, but you can easily change this:
from sqlalchemy import create_engine
from sqlalchemy.exc import OperationalError
from pydantic import PostgresDsn
from src.conf import settings
def build_db_connection_url(custom_db: Optional[str] = None):
db_name = f"/{settings.POSTGRES_DB or ''}" if custom_db is None else "/" + custom_db
return PostgresDsn.build(
scheme='postgresql+psycopg2',
user=settings.POSTGRES_USER,
password=settings.POSTGRES_PASSWORD,
host=settings.POSTGRES_HOST,
path=db_name,
)
def create_database(db_name: str):
try:
eng = create_engine(build_db_connection_url(custom_db=db_name))
conn = eng.connect()
conn.close()
except OperationalError as exc:
if "does not exist" in exc.__str__():
eng = create_engine(build_db_connection_url(custom_db="postgres"))
conn = eng.connect()
conn.execute("commit")
conn.execute(f"create database {db_name}")
conn.close()
print(f"Database {db_name} created")
else:
raise exc
eng.dispose()
create_database("test_database")