I need to perform a postgres bulk update using SQLAlchemy
Right now I am using this code to update which inefficient and very slow. I wanted to know if there is a better way to perform the below action instead of looping the image names one by one:
for image in list_of_image_names:
result_update = connection.execute(f"UPDATE databse.tablename SET image_downloaded = 'I' WHERE image_name = '{image}' AND image_downloaded = 'N';")
Code for establishing a connection:
def getconn()-> pg8000.dbapi.Connection:
conn: pg8000.dbapi.Connection = connector.connect(
db_secrets['connection_name'],
"pg8000",
user='some_user',
password='some_password',
db='some_dbname',
)
return conn
engine = sqlalchemy.create_engine(
"{}://".format("postgresql+pg8000"),
creator=getconn,
)
engine.dialect.description_encoding = None
try:
conne = engine.connect()
print("DB connection successful")
except Exception as e:
print(e)
raise
return conne
Related
I'm writing a python code to read from mysql database:
def create_server_connection(host, user, password):
connection = None
try:
connection = pymysql.connect(host='localhost',
user='root',
password='pwd',
database='raw_data',
cursorclass=pymysql.cursors.DictCursor)
print("MySQL Database connection successful")
except err as error:
print(f"Error: '{error}'")
return connection
def read_query(connection, query):
cur = connection.cursor()
result = None
try:
cur.execute(query)
result = cur.fetchall()
return result
except err as error:
print(f"Error: '{error}'")
return cur
def get_Tables_byName(cursor, tableName):
q1 = f'''
SELECT table_name FROM raw_data.tables
where table_name like '{tableName}'; '''
res = []
cursor.execute(q1)
for row in cursor:
res.append(row[0])
return res
get_Tables_byName(cursor,'data_31942010201')
If I want to call get_Tables_byName function, what should I put in the first parameter? If I put cursor, the error message shows NameError: name 'cursor' is not defined
I am using MariaDB Database Connector for Python and I have a singleton database class that is responsible for creating a pool and performing database operations on that pool. I have made every effort to close the pool after every access. But, still, after a while the pool becomes unusable and gets stuck, never to be freed. This might be a bug with the connector or a bug in my code. Once the pool is exhausted, I create and return a normal connection, which is not efficient for every database access.
Here's my database module code:
import mariadb
import configparser
import sys
from classes.logger import AppLogger
logger = AppLogger(__name__)
connections = 0
class Db:
"""
Main database for the application
"""
config = configparser.ConfigParser()
config.read('/app/config/conf.ini')
db_config = db_config = config['db']
try:
conn_pool = mariadb.ConnectionPool(
user = db_config['user'],
password = db_config['password'],
host = db_config['host'],
port = int(db_config['port']),
pool_name = db_config['pool_name'],
pool_size = int(db_config['pool_size']),
database = db_config['database'],
)
except mariadb.PoolError as e:
print(f'Error creating connection pool: {e}')
logger.error(f'Error creating connection pool: {e}')
sys.exit(1)
def get_pool(self):
return self.conn_pool if self.conn_pool != None else self.create_pool()
def __get_connection__(self):
"""
Returns a db connection
"""
global connections
try:
pconn = self.conn_pool.get_connection()
pconn.autocommit = True
print(f"Receiving connection. Auto commit: {pconn.autocommit}")
connections += 1
print(f"New Connection. Open Connections: {connections}")
logger.debug(f"New Connection. Open Connections: {connections}")
except mariadb.PoolError as e:
print(f"Error getting pool connection: {e}")
logger.error(f'Error getting pool connection: {e}')
# exit(1)
pconn = self.ــcreate_connectionــ()
pconn.autocommit = True
connections += 1
logger.debug(f'Created normal connection following failed pool access. Connections: {connections}')
return pconn
def ــcreate_connectionــ(self):
"""
Creates a new connection. Use this when getting a
pool connection fails
"""
db_config = self.db_config
return mariadb.connect(
user = db_config['user'],
password = db_config['password'],
host = db_config['host'],
port = int(db_config['port']),
database = db_config['database'],
)
def exec_sql(self, sql, values = None):
global connections
pconn = self.__get_connection__()
try:
cur = pconn.cursor()
print(f'Sql: {sql}')
print(f'values: {values}')
cur.execute(sql, values)
# pconn.commit()
# Is this a select operation?
if sql.startswith('SELECT') or sql.startswith('Select') or sql.startswith('select'):
result = cur.fetchall() #Return a result set for select operations
else:
result = True
pconn.close()
connections -= 1
print(f'connection closed: connections: {connections}')
logger.debug(f'connection closed: connections: {connections}')
# return True #Return true for insert, update, and delete operations
return result
except mariadb.Error as e:
print(f"Error performing database operations: {e}")
# pconn.rollback()
pconn.close()
connections -=1
print(f'connection closed: connections: {connections}')
return False
To use the class in a module, I import the class there and simply instantiate an object from the class and run sql queries on it:
db = Db()
users = db.exec_sql("SELECT * FROM users")
Any ideas why the pool gets exhausted after a while (maybe days) and never gets healed?
Maybe a different error from mariadb.Error is raised sometimes and the connection is never closed. I believe the best practice would be to use a finally section to guarantee that the connection is always closed, like this:
pconn = None
try:
pconn = self.__get_connection__()
# ...
except mariadb.Error as e:
# ...
finally:
if pconn:
try:
pconn.close()
except:
# Not really expected, but if this ever happens it should not alter
# whatever happened in the try or except sections above.
Good day. I wrote an application on python that collects call logs from the Avaya PBX and writes them to the mysql database. It works well, but sometimes the PBX sends an empty string for some reason and the program fails. I attach the screen and code below. I understand that you need to wrap the function in an exception: try except, but I don’t understand how to do it. Please tell me how to do this.enter image description here
def write_db(item, *agrs):
connection = pymysql.connect(host='localhost',
user='acdr',
password='it8ejokd',
db='avaya_cdr',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
DBTBL = "cdr102019"
DBFLD = "Date_call, Time_call, `Sec_dur`, `clg_num_in_tag`, `dialed_num`, dep_called, dep_dialed"
dep_num_call = find_dep(item[3].replace(' ', ''))
name_dep_call = name_dep(dep_num_call)
dep_num_dial = find_dep(item[4].replace(' ', ''))
name_dep_dial = name_dep(dep_num_dial)
item.append(name_dep_call)
item.append(name_dep_dial)
item[1] = item[1] + "00"
try:
with connection.cursor() as cursor:
sql = "INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES (%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql, (item))
connection.commit()
finally:
connection.close()
# Задаем адрес сервера
SERVER_ADDRESS = ('', 5100)
# Настраиваем сокет
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind(SERVER_ADDRESS)
server_socket.listen(5)
print('server is running, please, press ctrl+c to stop')
# Слушаем запросы и пишем в db
while True:
connection, address = server_socket.accept()
data = connection.recv(1024)
if not(b'\x00\x00\x00' in data) and not(b'1370' in data):
str = data.decode("utf-8")
item=[str[0:6],str[7:11],str[12:17],str[18:33],str[34:57]]
print(item)
write_db(item)
connection.close()
You'll have to catch the exception, so we can cater for a few types, but just to be sure and get you up and running, you could do the following :)
try:
with connection.cursor() as cursor:
sql = (
"INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES "
"(%s,%s,%s,%s,%s,%s,%s)"
)
cursor.execute(
sql,
(item),
)
connection.commit()
except Exception as e:
print("Error occurred: %s"% e)
finally:
connection.close()
This should do the trick. I've used all four elements of try/except/else/finally here, with brief explanations of when they're executed.
try:
with connection.cursor() as cursor:
sql = "INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES (%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql, (item))
except Exception: # If this code fails, ignore it
pass
else: # If the code inside 'try' succeeds, execute this code.
connection.commit()
finally: # Regardless of whether or not the code inside 'try' succeeds, execute this code
connection.close()
In a classical "Threading/Queue"-application. I need to do further calculations in my "consumer"-function. After Queue is empty no further code is executed after urls.task_done().
I am importing market data from an JSON api and import it into my MariaDB database.
On the API every item that i want to fetch has an own url, so I am creating a queue for all available urls in a function.
A "consumer"-function processes the queue importing a new set of data or updating an existent entry depending on the already existing data in my database. I already tried to wrap the actual while True loop into its own function but it didn't work for me.
def create_url():
try:
mariadb_connection = mariadb.connect(host='host
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
cursor.execute('SELECT type_id from tbl_items')
item_list = cursor.fetchall()
print("Create URL - Record retrieved successfully")
for row in item_list:
url = 'https://someinternet.com/type_id=' + \
str(row[0])
urls.put(url)
return urls
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table
{}".format(error))
finally:
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
def import(urls):
list_mo_esi = []
try:
mariadb_connection = mariadb.connect(host='host',
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
while True:
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount('https://', HTTPAdapter(max_retries=retries))
jsonraw = s.get(urls.get())
jsondata = ujson.loads(jsonraw.text)
for row in jsondata:
cursor.execute('SELECT order_id from tbl_mo WHERE order_id = %s',
(row['order_id'], ))
exists_mo = cursor.fetchall()
list_mo_esi.append(row['order_id'])
if len(exists_mo) != 0:
print("updating order#", row['order_id'])
cursor.execute('UPDATE tbl_mo SET volume = %s, price = %s WHERE order_id = %s',
(row['volume_remain'], row['price'], row['order_id'], ))
mariadb_connection.commit()
else:
cursor.execute('INSERT INTO tbl_mo (type_id, order_id, ordertype,volume, price) VALUES (%s,%s,%s,%s,%s)',
(row['type_id'], row['order_id'], row['is_buy_order'], row['volume_remain'], row['price'], ))
mariadb_connection.commit()
urls.task_done()
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table {}".format(error))
The following finally part of my function is not executed, but should.
finally:
list_mo_purge = list(set(list_mo_sql)-set(list_mo_esi))
cursor.execute('SELECT order_id FROM tbl_mo')
list_mo_sql = cursor.fetchall()
print(len(list_mo_esi))
print(len(list_mo_sql))
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
main thread
for i in range(num_threads):
worker = Thread(target=import_mo, args=(urls,))
worker.setDaemon(True)
worker.start()
create_url()
urls.join()
After all tasks are completed my worker stop executing code right after urls.task_done().
However, i have some more code after the function urls.task_done() i need to be executed for closing database connection and cleaning up my database from old entries. How can I make this "finally"-part run?
You are not breaking from the while.
You should do the following:
if urls.empty():
break
Most likely your import thread gets blocked at urls.get()
#!/usr/bin/env python
import pika
def doQuery( conn, i ) :
cur = conn.cursor()
cur.execute("SELECT * FROM table OFFSET %s LIMIT 100000", (i,))
return cur.fetchall()
print "Using psycopg2"
import psycopg2
myConnection = psycopg2.connect( host=hostname, user=username,
password=password, dbname=database )
connection =
pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
endloop = False
i = 1
while True:
results = doQuery( myConnection, i )
j = 0
while j < 10000:
try:
results[j][-1]
except:
endloop = True
break
message = str(results[j][-1]).encode("hex")
channel.basic_publish(exchange='',
routing_key='task_queue2',
body=message
#properties=pika.BasicProperties(
#delivery_mode = 2, # make message persistent
)#)
j = j + 1
# if i % 10000 == 0:
# print i
if endloop == False:
break
i = i + 10000
The SQL query is taking too long to execute when i gets to 100,000,000, but I have about two billion entries I need to put into the queue. Anyone know of a more efficient SQL query that I can run so that I can get all those two billion into the queue faster?
psycopg2 supports server-side cursors, that is, a cursor that is managed on the database server rather than in the client. The full result set is not transferred all at once to the client, rather it is fed to it as required via the cursor interface.
This will allow you to perform the query without using paging (as LIMIT/OFFSET implements), and will simplify your code. To use a server side cursor use the name parameter when creating the cursor.
import pika
import psycopg2
with psycopg2.connect(host=hostname, user=username, password=password, dbname=database) as conn:
with conn.cursor(name='my_cursor') as cur: # create a named server-side cursor
cur.execute('select * from table')
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
for row in cur:
message = str(row[-1]).encode('hex')
channel.basic_publish(exchange='', routing_key='task_queue2', body=message)
You might want to tweak cur.itersize to improve performance if necessary.