I have a class that helps me with SQL queries and inserts in Postgres. I'm using psycopg2==2.7.5 right now. One of the methods I'm using looks like this:
import pandas as pd
import psycopg2.extensions as ps_ext
from typing import List
def insert_with_open_connection(self, df: pd.DataFrame, table_name: str, cursor: ps_ext.cursor,
conn: ps_ext.connection,
success_msg: str = 'Success',
conflict_cols: List[str] = None):
try:
# Format the INSERT SQL query
cols = str(tuple(df.columns)).replace("'", '')
nc = df.shape[1]
ss = "(" + ''.join('%s,' for _ in range(nc))[:-1] + ")"
try:
args_str = str(b','.join(cursor.mogrify(ss, x) for x in df.values), 'utf-8')
except psycopg2.ProgrammingError:
args_str = str(b','.join(cursor.mogrify(ss, x) for x in self.clean_numpy_int_for_mogrify(df.values)),
'utf-8')
args_str = args_str.replace("\'NaN\'::float", 'NULL')
insert_sql = f'INSERT INTO {table_name} {cols} VALUES {args_str}'
if conflict_cols is not None:
conf_cols = str(tuple(conflict_cols)).replace("'", '').replace(',)', ')')
insert_sql += f"\nON CONFLICT {conf_cols} DO NOTHING"
insert_sql += ';'
cursor.execute(insert_sql)
conn.commit()
return success_msg, 200
except Exception:
return traceback.format_exc(), 400
The conn and cursor parameters are generated from a SqlAlchemy Engine with code like this:
def create_pool(self, **db_config):
db_user = self.user
db_pass = self.password
db_name = self.database
# e.g. "/cloudsql"
db_socket_dir = os.environ.get("DB_SOCKET_DIR", "/cloudsql")
# i.e "<PROJECT-NAME>:<INSTANCE-REGION>:<INSTANCE-NAME>"
cloud_sql_connection_name = os.environ.get("CLOUD_SQL_CONNECTION_NAME",
'<PROJECT-NAME>:<INSTANCE-REGION>:<INSTANCE-NAME>')
self.pool = sqlalchemy.create_engine(
# Equivalent URL:
# postgresql+pg8000://<db_user>:<db_pass>#/<db_name>
# ?unix_sock=<socket_path>/<cloud_sql_instance_name>/.s.PGSQL.5432
sqlalchemy.engine.url.URL.create(drivername="postgresql+pg8000",
username=db_user, # e.g. "my-database-user"
password=db_pass, # e.g. "my-database-password"
database=db_name, # e.g. "my-database-name"
query={"unix_sock":
f"{db_socket_dir}/{cloud_sql_connection_name}/.s.PGSQL.5432"}),
**db_config
)
def get_db_connection(self) -> Connection:
if self.pool is None:
self.create_pool()
assert isinstance(self.pool, Engine)
try:
return self.pool.raw_connection()
except psycopg2.OperationalError:
self.create_pool()
return self.pool.raw_connection()
#contextlib.contextmanager
def db_connect(self):
db = self.get_db_connection()
cur = db.cursor()
try:
yield db, cur
finally:
db.close()
I'm trying to use this code inside a Google Cloud Function (Linux) and I get the following error/traceback when I run the insert_with_open_connection method there:
Traceback (most recent call last):
File "/workspace/db/sql_helper.py", line 221, in insert_with_open_connection
args_str = str(b','.join(cursor.mogrify(ss, x) for x in df.values), 'utf-8')
File "/workspace/db/sql_helper.py", line 221, in <genexpr>
args_str = str(b','.join(cursor.mogrify(ss, x) for x in df.values), 'utf-8')
AttributeError: 'Cursor' object has no attribute 'mogrify'
It's obvious that the cursor in the code doesn't seem to have the attribute mogrify, but based on the docs here, the mogrify method should exist.
I took a look at the code and noticed that you were using import psycopg2.extensions as ps_ext; and clearly that had mogrify in
the docs.
Then I came across this line:
self.pool = sqlalchemy.create_engine(
# Equivalent URL:
# postgresql+pg8000://<db_user>:<db_pass>#/<db_name>
# ?unix_sock=<socket_path>/<cloud_sql_instance_name>/.s.PGSQL.5432
sqlalchemy.engine.url.URL.create(drivername="postgresql+pg8000",
username=db_user, # e.g. "my-database-user"
password=db_pass, # e.g. "my-database-password"
database=db_name, # e.g. "my-database-name"
query={"unix_sock":
f"{db_socket_dir}/{cloud_sql_connection_name}/.s.PGSQL.5432"}),
**db_config
)
You aren't using the psycopg2 driver; but the pg8000 one and tracing
the way things are generated, the cursor as returned by the db.cursor() which in turn was created by the self.pool.raw_connection(),
I came to the conclusion that the cursor wasn't a ps_ext cursor but
a pg8000 cursor, which doesn't have the mogrify method as
shown in: https://github.com/tlocke/pg8000/blob/main/pg8000/dbapi.py
This is the likelihood of why you're having this error. I think
the solution is to change to using psycopg2 driver instead.
That said, this answer could be wrong and I'm barking up the wrong tree.
Related
My company gets a few flat files each week that needs to be uploaded into our database. These are usually split off into two separate tables depending on the naming conventions of the file. The source of the files are consistent and the columns are validated before running the python script. Attached is what the code currently looks like
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def uploadCustomer(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def main():
print('Starting Program')
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename)
elif 'Customer' in filename:
uploadCustomer(filename)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
print('Program Finished')
My questions are:
Is it better to implicitly declare the connection/close to the database within each function or state it once in the main function and only perform commits in each function? Not sure how much of a performance hit this takes and was curious what is the best practice using pyodbc.
If multiple queries are going to be called to different, is it best to initialize/close the cursor?
Since the functions basically handle the same way aside from the SQL would it be better to statically define the sql with if/else and just have one upload function?
With the refactored code, would this be more pythonic and efficient with runtime?
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename,conn,cursor):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def uploadCustomer(filename,conn,curosr):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def main():
print('Starting Program')
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
cursor = conn.cursor()
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename, conn, cursor)
elif 'Customer' in filename:
uploadCustomer(filename, conn, cursor)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
cursor.close()
conn.close()
print('Program Finished')
A bit newer to programming with pyodbc so best practices would be appreciated!
Consider encapsulating your methods inside a class object which opens connection once and re-uses cursor multiple times and on deletion of object closes the cursor and connection.
import glob
import pandas as pd
import numpy as np
import pyodbc as dbc
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
def processFiles():
for filename in self.glob_files:
if 'Person' in filename:
self.filename = filename
self.uploadPerson()
elif 'Customer' in filename:
self.filename = filename
self.uploadCustomer()
else:
print(self.failed_import_msg.format(filename))
def uploadPerson(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
def uploadCustomer(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))
def __del__(self):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
obj = DataBaseAPI('*.xlsx')
obj.processFiles()
del obj
Alternatively, use the __enter__ and __exit__ methods to run your class object in a context manager:
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
def __enter__(self):
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
return self # IMPORTANT TO ADD
...
def __exit__(self, exception_type, exception_val, trace):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
with DataBaseAPI('*.xlsx') as obj:
obj.processFiles()
So I am trying to create an auto update to SQL from another excel file, by unique value, as to know what is the new data to add to the database..
There's different in columns names between the database and the excel file as in the database and names without spaces...
I tried to do it with pandas it gave me the same error
So here's my simple code tried with xlrd
import xlrd
from sqlalchemy import create_engine
def insert():
book = xlrd.open_workbook(r"MNM_Rotterdam_5_Daily_Details-20191216081027 - Copy (2).xlsx")
sheet = book.sheet_by_name("GSM Details")
database = create_engine(
'mssql+pyodbc://WWX542337CDCD\SMARTRNO_EXPRESS/myDB?driver=SQL+Server+Native+Client+11.0') # name of database
cnxn = database.raw_connection
cursor = cnxn.cursor()
query = """Insert INTO [myDB].[dbo].[mnm_rotterdam_5_daily_details-20191216081027] (Date, SiteName, CellCI, CellLAC, CellName, CellIndex) values (?,?,?,?,?,?)"""
for r in range(1, sheet.nrows):
date = sheet.cell(r,0).value
site_name = sheet.cell(r,3).value
cell_ci = sheet.cell(r,4).value
cell_lac = sheet.cell(r,5).value
cell_name = sheet.cell(r,6).value
cell_index = sheet.cell(r,7).value
values = (date, site_name, cell_ci, cell_lac, cell_name, cell_index)
cursor.execute(query, values)
cnxn.commit()
# Close the cursor
cursor.close()
# Commit the transaction
database.commit()
# Close the database connection
database.close()
# Print results
print ("")
print ("")
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("Imported", columns,"columns and", rows, "rows. All Done!")
insert()
and this is the error:
I tried to change the range I found another error:
Traceback (most recent call last):
File "D:/Tooling/20200207/uniquebcon.py", line 48, in <module>
insert()
File "D:/Tooling/20200207/uniquebcon.py", line 37, in insert
database.commit()
AttributeError: 'Engine' object has no attribute 'commit'
I think this is related to SQL-Alchemy in the connection
Instead of creating the cursor directly with
cursor = database.raw_connection().cursor()
you can create a connection object, then create the cursor from that, and then call .commit() on the connection:
cnxn = database.raw_connection()
crsr = cnxn.cursor()
# do stuff with crsr ...
cnxn.commit()
I am trying to select from a specific row and then column in SQL.
I want to find a specific user_name row and then select the access_id from the row.
Here is all of my code.
import sys, ConfigParser, numpy
import MySQLdb as mdb
from plaid.utils import json
class SQLConnection:
"""Used to connect to a SQL database and send queries to it"""
config_file = 'db.cfg'
section_name = 'Database Details'
_db_name = ''
_hostname = ''
_ip_address = ''
_username = ''
_password = ''
def __init__(self):
config = ConfigParser.RawConfigParser()
config.read(self.config_file)
print "making"
try:
_db_name = config.get(self.section_name, 'db_name')
_hostname = config.get(self.section_name, 'hostname')
_ip_address = config.get(self.section_name, 'ip_address')
_user = config.get(self.section_name, 'user')
_password = config.get(self.section_name, 'password')
except ConfigParser.NoOptionError as e:
print ('one of the options in the config file has no value\n{0}: ' +
'{1}').format(e.errno, e.strerror)
sys.exit()
self.con = mdb.connect(_hostname, _user, _password, _db_name)
self.con.autocommit(False)
self.con.ping(True)
self.cur = self.con.cursor(mdb.cursors.DictCursor)
def query(self, sql_query, values=None):
"""
take in 1 or more query strings and perform a transaction
#param sql_query: either a single string or an array of strings
representing individual queries
#param values: either a single json object or an array of json objects
representing quoted values to insert into the relative query
(values and sql_query indexes must line up)
"""
# TODO check sql_query and values to see if they are lists
# if sql_query is a string
if isinstance(sql_query, basestring):
self.cur.execute(sql_query, values)
self.con.commit()
# otherwise sql_query should be a list of strings
else:
# execute each query with relative values
for query, sub_values in zip(sql_query, values):
self.cur.execute(query, sub_values)
# commit all these queries
self.con.commit
return self.cur.fetchall
def get_plaid_token(self,username):
result= self.query("SELECT access_id FROM `users` WHERE `user_name` LIKE %s",[username])
print type (result)
return result
print SQLConnection().get_plaid_token("test")
I would like the get the transaction ID but for some reason "result" returns
> <bound method DictCursor.fetchall of <MySQLdb.cursors.DictCursor
> object at 0x000000000396F278>>
result is also of type "instancemethod"
try changing this line:
return self.cur.fetchall
to
return self.cur.fetchall()
Without the parentheses after the method name, you are returning a reference to that method itself, not running the method.
This is my code:
import pymysql
def connect():
print("connect to database")
pw = input("Password: ")
conn = pymysql.connect(host='localhost', port=3306,
user='root', passwd=pw, db='contacts')
conn.autocommit(True)
cur_ = conn.cursor()
return cur_
def show_tables(self):
print("show tables: ")
self.execute("""SHOW TABLES""")
print(self.fetchall())
return self.fetchall()
db = connect()
table_names = show_tables(db) # returns a tuple
print(len(table_names)) # output zero
table_name = table_names[0][0] # ? - todo - get item from tuple
show_tables() return the value (('person',),).
I want to get the name person with table_names[0][0]. But this doesn't work. Also the length of (('person',),) is 0. But why?
Edit:
I get the error:
Traceback (most recent call last):
File "/home/kame/Dropbox/code/python/scripts/database.py", line 65, in <module>
table_name = table_names[0][0] # ? - todo - get item from tuple
IndexError: tuple index out of range
It looks like show_tables(self) is returning null, a list on None object, because you can only call one time cursor.fetchall().
The solution : comment the line
print(self.fetchall())
import MySQLdb
import sys
from libdesklets.controls import Control
from IDBConnection import IDBConnection
class DBConnection(Control, IDBConnection):
host = 'xxxx'
user = 'xxxx'
passwd = 'xxxx'
db = 'xxxx'
def __init__(self):
Control.__init__(self)
pass
def __get_dbconnection(self):
db = MySQLdb.connect(self.host, self.user, self.passwd, self.db)
return db
def __insert(self):
db = self.__get_dbconnection()
cursor = db.cursor()
cursor.execute("INSERT INTO Usernotes (UID, NID, Inhalt) VALUES (3, 1, 'text');")
cursor.close()
db.close()
def __select(self):
db = self.__get_dbconnection()
cursor = db.cursor()
cursor.execute("SELECT Inhalt FROM Usernotes WHERE UID = 1 AND NID = 1;")
cursor.close()
db.close()
def __update(self):
db = self.__get_dbconnection()
cursor = db.cursor()
cursor.execute("UPDATE Usernotes SET Inhalt = 'inserttest' WHERE UID = 1 AND NID = 2;")
cursor.close()
db.close()
insert = property(__insert, doc="insert into database")
select = property(__select, doc="select from database")
update = property(__update, doc="update database")
def get_class(): return DBConnection
The code above is a Control to work with a mysql-Database for Linux gdesklets (thats where import Control and import IDBConnection is coming from). So when we call the properties from another file (dbc.insert() / dbc.select() / dbc.update()) we get the error "'NoneType' object is not callable". If we add return types we get "'ReturnType' object is not callable". The functions are working and the database operations are done but the display-file (where the functions are called) crashes after the exception.
Hopefully someone can help us here.
Your query returned an empty set, or the value of a column was null.
Without a specific error, it's more difficult to say, but I think your select is coming up with an empty set.
Your insert sets UID = 3 and NID = 1, your update is looking to change where UID = 1 and NID 2, but your select is looking for UID = 1 and NID = 1.
I'm guessing that's where it's bombing.