PostgreSQL query taking too long via Python

PostgreSQL query taking too long via Python - python

#!/usr/bin/env python
import pika
def doQuery( conn, i ) :
cur = conn.cursor()
cur.execute("SELECT * FROM table OFFSET %s LIMIT 100000", (i,))
return cur.fetchall()
print "Using psycopg2"
import psycopg2
myConnection = psycopg2.connect( host=hostname, user=username,
password=password, dbname=database )
connection =
pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
endloop = False
i = 1
while True:
results = doQuery( myConnection, i )
j = 0
while j < 10000:
try:
results[j][-1]
except:
endloop = True
break
message = str(results[j][-1]).encode("hex")
channel.basic_publish(exchange='',
routing_key='task_queue2',
body=message
#properties=pika.BasicProperties(
#delivery_mode = 2, # make message persistent
)#)
j = j + 1
# if i % 10000 == 0:
# print i
if endloop == False:
break
i = i + 10000
The SQL query is taking too long to execute when i gets to 100,000,000, but I have about two billion entries I need to put into the queue. Anyone know of a more efficient SQL query that I can run so that I can get all those two billion into the queue faster?

psycopg2 supports server-side cursors, that is, a cursor that is managed on the database server rather than in the client. The full result set is not transferred all at once to the client, rather it is fed to it as required via the cursor interface.
This will allow you to perform the query without using paging (as LIMIT/OFFSET implements), and will simplify your code. To use a server side cursor use the name parameter when creating the cursor.
import pika
import psycopg2
with psycopg2.connect(host=hostname, user=username, password=password, dbname=database) as conn:
with conn.cursor(name='my_cursor') as cur: # create a named server-side cursor
cur.execute('select * from table')
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
for row in cur:
message = str(row[-1]).encode('hex')
channel.basic_publish(exchange='', routing_key='task_queue2', body=message)
You might want to tweak cur.itersize to improve performance if necessary.

Related

MySQL to OPC UA Server

I'm trying to read the data from MySql database to OPC UA server. I tested it with the following code and sample database it is working. However, I'm not sure if it runs in a real time environment as the database has 40+ tables and 30+ columns in each table recording 1 minute data. Can someone please suggest the optimal way to do this.
from opcua import ua, uamethod, Server
from time import sleep
import logging
import mysql.connector
mydb = mysql.connector.connect(
host="127.0.0.1",
port=3306,
user="root",
password="root",
database="classicmodels")
mycursor = mydb.cursor(buffered=True , dictionary=True)
sql = "SELECT * FROM classicmodels.customers"
mycursor.execute(sql)
myresult = mycursor.fetchone()
sql1 = "SELECT * FROM classicmodels.employees"
mycursor.execute(sql1)
myresult1 = mycursor.fetchone()
if __name__ == "__main__":
"""
OPC-UA-Server Setup
"""
server = Server()
endpoint = "opc.tcp://127.0.0.1:4848"
server.set_endpoint(endpoint)
servername = "Python-OPC-UA-Server"
server.set_server_name(servername)
"""
OPC-UA-Modeling
"""
root_node = server.get_root_node()
object_node = server.get_objects_node()
idx = server.register_namespace("OPCUA_SERVER")
myobj = object_node.add_object(idx, "DA_UA")
myobj1 = object_node.add_object(idx, "D_U")
"""
OPC-UA-Server Add Variable
"""
for key, value in myresult.items():
myobj.add_variable(idx, key, str(value))
for key, value in myresult1.items():
myobj1.add_variable(idx, key, str(value))
"""
OPC-UA-Server Start
"""
server.start()
'''

Troubles using async library : aiomysql (python - bot)

I'm trying to transform my standard database functions into aiomysql async functions (for a bot) but I don't really understand how does the async functions work...
Here's my actual code that I want to transform :
def connection(Database):
conn = mysql.connector.connect(host=Database[0],
user=Database[1],
password=Database[2],
database=Database[3])
c = conn.cursor()
return c, conn
def insertToTable(Database, insert, data):
c, conn = connection(Database)
try:
pass
c.execute(insert, data)
conn.commit()
except mysql.connector.IntegrityError as e:
#cnx.close()
def deleteFromTable(Database, query):
c, conn = connection(Database)
try:
c.execute(query)
c.commit()
except:
pass
def getMax(Database, columnName, tableName):
c, conn = connection(Database)
c.execute("SELECT MAX(" + columnName + ") FROM " + tableName)
result = c.fetchall()
for i in result:
if i[0] is None:
return 0
else:
return i[0]
My projects is separed in multiples files, I got some others basics requests that I didn't transform in function :
c, conn = dbconnect.connection(DB)
c.execute("update Tar SET va= (%s) WHERE x=1",(va_num))
conn.commit()
and some select fetchall/fetchone etc
I wrote that after reading the documentations and finding some (rare) examples :
import asyncio
import aiomysql
import setup as setup
loop = asyncio.get_event_loop()
#asyncio.coroutine
def exec_sql(insert, data):
pool = yield from aiomysql.create_pool(host=setup.DB_local[0], port=3306,
user=setup.DB_local[1], password=setup.DB_local[2],
db=setup.DB_local[3], loop=loop, autocommit=False)
with (yield from pool) as conn:
cur = yield from conn.cursor()
yield from cur.execute(insert, data)
yield from conn.commit()
conn.close
#pool.close()
#yield from pool.wait_closed()
insert = ("INSERT into discord_rights (discord_id, discord_name, discord_role, is_admin, is_caster, is_player)""VALUES (%s, %s, %s, %s, %s, %s)")
data = (10, "lea", 0, 0, 1, 1)
sql = loop.run_until_complete(exec_sql(insert, data))
#asyncio.coroutine
def get_one_sql(sql):
pool = yield from aiomysql.create_pool(host=setup.DB_local[0], port=3306,
user=setup.DB_local[1], password=setup.DB_local[2],
db=setup.DB_local[3], loop=loop, autocommit=False)
with (yield from pool) as conn:
cur = yield from conn.cursor()
yield from cur.execute(sql)
r = yield from cur.fetchone()
conn.close
return r
#pool.close()
#yield from pool.wait_closed()
sql = loop.run_until_complete(get_one_sql("SELECT * from discord_rights WHERE discord_id = 124545xxxxxxxx"))
print(sql)
But I'm not sure if this is a good way cause I create a new pool for every request, right ?
Can someone help me to build on of the function (importing the pool created in an another part of the code) or something better if I'm still wrong ?
Thx for your help and sorry for the long message, I prefered to show you my codes instead of nothing !

Running multithreaded code after Queue.task_done()

In a classical "Threading/Queue"-application. I need to do further calculations in my "consumer"-function. After Queue is empty no further code is executed after urls.task_done().
I am importing market data from an JSON api and import it into my MariaDB database.
On the API every item that i want to fetch has an own url, so I am creating a queue for all available urls in a function.
A "consumer"-function processes the queue importing a new set of data or updating an existent entry depending on the already existing data in my database. I already tried to wrap the actual while True loop into its own function but it didn't work for me.
def create_url():
try:
mariadb_connection = mariadb.connect(host='host
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
cursor.execute('SELECT type_id from tbl_items')
item_list = cursor.fetchall()
print("Create URL - Record retrieved successfully")
for row in item_list:
url = 'https://someinternet.com/type_id=' + \
str(row[0])
urls.put(url)
return urls
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table
{}".format(error))
finally:
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
def import(urls):
list_mo_esi = []
try:
mariadb_connection = mariadb.connect(host='host',
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
while True:
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount('https://', HTTPAdapter(max_retries=retries))
jsonraw = s.get(urls.get())
jsondata = ujson.loads(jsonraw.text)
for row in jsondata:
cursor.execute('SELECT order_id from tbl_mo WHERE order_id = %s',
(row['order_id'], ))
exists_mo = cursor.fetchall()
list_mo_esi.append(row['order_id'])
if len(exists_mo) != 0:
print("updating order#", row['order_id'])
cursor.execute('UPDATE tbl_mo SET volume = %s, price = %s WHERE order_id = %s',
(row['volume_remain'], row['price'], row['order_id'], ))
mariadb_connection.commit()
else:
cursor.execute('INSERT INTO tbl_mo (type_id, order_id, ordertype,volume, price) VALUES (%s,%s,%s,%s,%s)',
(row['type_id'], row['order_id'], row['is_buy_order'], row['volume_remain'], row['price'], ))
mariadb_connection.commit()
urls.task_done()
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table {}".format(error))
The following finally part of my function is not executed, but should.
finally:
list_mo_purge = list(set(list_mo_sql)-set(list_mo_esi))
cursor.execute('SELECT order_id FROM tbl_mo')
list_mo_sql = cursor.fetchall()
print(len(list_mo_esi))
print(len(list_mo_sql))
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
main thread
for i in range(num_threads):
worker = Thread(target=import_mo, args=(urls,))
worker.setDaemon(True)
worker.start()
create_url()
urls.join()
After all tasks are completed my worker stop executing code right after urls.task_done().
However, i have some more code after the function urls.task_done() i need to be executed for closing database connection and cleaning up my database from old entries. How can I make this "finally"-part run?

You are not breaking from the while.
You should do the following:
if urls.empty():
break
Most likely your import thread gets blocked at urls.get()

MySQL server has gone away issue

I am newbie in python, so, it looks like my first project on that lang.
Everytime when I'm trying to run my script - I get different answers from mysql server.
The most frequent answer is OperationalError: (2006, 'MySQL server has gone away')
Sometimes I get output Thread: 11 commited (see code below).
And sometimes emergency stop (traslated, I have russian output in console).
Whatever if output full of commited - records in table still the same.
import MySQLdb
import pyping
import socket, struct
from threading import Thread
def ip2int(addr):
"""Convert ip to integer"""
return struct.unpack("!I", socket.inet_aton(addr))[0]
def int2ip(addr):
"""Convert integer to ip"""
return socket.inet_ntoa(struct.pack("!I", addr))
def ping(ip):
"""Pinging client"""
request = pyping.ping(ip, timeout=100, count=1)
return int(request.max_rtt)
class UpdateThread(Thread):
def __init__(self, records, name):
Thread.__init__(self)
self.database = MySQLdb.connect(host="***", port=3306, user="root", passwd="***", db="dns")
self.cursor = database.cursor()
self.name = name
self.records = records
def run(self):
print(self.name)
for r in self.records:
#latency = ping(int2ip(r[1])) what the hell :x
#ip = str(int2ip(r[1]))
id = str(r[0])
self.cursor.execute("""update clients set has_subn=%s where id=%s""" % (id, id))
self.database.commit()
print(self.name + " commited")
#start
database = MySQLdb.connect(host="***", port=3306, user="root", passwd="***", db="dns")
cursor = database.cursor()
cursor.execute("""select * from clients""")
data = cursor.fetchall() #All records from DataBase
count = len(data)
threads_counter = 10 #We are creating 10 threads for all records
th_count = count / threads_counter #Count of records for each thread
last_thread = count % threads_counter #Last records
threads = []
i = 0
while i < (count - last_thread):
temp_list = data[i:(i+th_count)]
#print(temp_list)
threads.append(UpdateThread(records = temp_list, name = "Thread: " + str((i/3) + 1)).start())
i += th_count
threads.append(UpdateThread(records = data[i: count], name = "Thread: 11").start())
P.S.
Another answers I found here is not helping me.
UPD:
I found that some(everytime another) thread print
OperationalError: (2013, 'Lost connection to MySQL server during query') and all next threads print OperationalError: (2013, 'Lost connection to MySQL server during query')

You need to close your DB connections when you're done with them or else the DB server will become overwhelmed and make your connections expire. For your program, I would change your code so that you have only one DB connection. You can pass a reference to it to your UpdateThread instances and close it when you're done.
database.close()

How to check the connection alive in python?

I make project read RFID tag using python on raspberry pi and using reader RDM880.
My idea is to take the time in and time out to check with the staff to work on time or not.
I try to add card_ID, time_in, time_out to local mysql and remote mysql (IP: 192.168.137.1) using python.
It has the same table in remote and local mysql.
If mysql remote is broken, I want only add to local mysql.
Here is my code:
import serial
import time
import RPi.GPIO as GPIO
import MySQLdb
from datetime import datetime
from binascii import hexlify
serial=serial.Serial("/dev/ttyAMA0",
baudrate=9600,
parity=serial.PARITY_NONE,
stopbits=serial.STOPBITS_ONE,
bytesize=serial.EIGHTBITS,
timeout=0.1)
db_local = MySQLdb.connect("localhost","root","root","luan_van") #connect local
db = MySQLdb.connect("192.168.137.1", "root_a","","luan_van") #connect remote
ID_rong = 128187 # reader respone if no card
chuoi= "\xAA\x00\x03\x25\x26\x00\x00\xBB"
def RFID(str): #function read RFID via uart
serial.write(chuoi)
data = serial.readline()
tach_5 = data[5]
tach_6 = data[6]
hex_5 = hexlify(tach_5)
hex_6= hexlify(tach_6)
num_5 = int(hex_5,16)
num_6 = int(hex_6,16)
num_a = num_5 * 1000 + num_6
if(num_a != ID_rong):
tach_7 = data[7]
tach_8 = data[7]
hex_7 = hexlify(tach_7)
hex_8= hexlify(tach_8)
num_7 = int(hex_7,16)
num_8 = int(hex_8,16)
num = num_8 + num_7 * 1000 + num_6 * 1000000 + num_5 * 1000000000
else:
num = num_5 * 1000 + num_6
return num
def add_database(): # add card_ID and time_in to remote mysql
with db:
cur = db.cursor()
cur.execure("INSERT INTO tt_control(Card_ID,Time_in) VALUES ('%d',NOW()) " %num)
return
def add_database_local(): # add card_ID and time_in to remote mysql
with db_local:
cur = db_local.cursor()
cur.execure("INSERT INTO tt_control(Card_ID,Time_in) VALUES ('%d',NOW()) " %num)
return
def have_ID(int): #check ID in table tt_control
with db_local:
cur = db_local.cursor(MySQLdb.cursors.DictCursor)
cur.execute("SELECT * FROM tt_control WHERE Card_ID = '%d'" %num)
rows = cur.fetchall()
ID=""
for row in rows:
ID = row['Card_ID']
return ID
def add_time_out(): #add time out to remote mysql
with db:
cur = db.cursor(MySQLdb.cursors.DictCursor)
cur.execute("UPDATE tt_control SET Time_out = NOW() WHERE Card_ID = '%d'" %num)
return
def add_time_out_local(): #add time out to local mysql
with db_local:
cur = db_local.cursor(MySQLdb.cursors.DictCursor)
cur.execute("UPDATE tt_control SET Time_out = NOW() WHERE Card_ID = '%d'" %num)
return
def add_OUT(): #increase Card_ID to distinguish second check
with db:
cur = db.cursor(MySQLdb.cursors.DictCursor)
cur.execute("UPDATE tt_control SET Card_ID = Card_ID + 1 WHERE Card_ID = '%d'" %num)
return
def add_OUT_local(): #increase Card_ID to distinguish second check
with db_local:
cur = db_local.cursor(MySQLdb.cursors.DictCursor)
cur.execute("UPDATE tt_control SET Card_ID = Card_ID + 1 WHERE Card_ID = '%d'" %num)
return
while 1:
num = RFID(chuoi)
time.sleep(1)
Have_ID =have_ID(num)
if(num != ID_rong):
if(Have_ID ==""):
add_database() #---> it will error if remote broken, how can i fix it?
add_database_local()
else:
add_time_out() #---> it will error if remote broken, how can i fix it? I think connection alive can fix, but I don't know
add_time_out_local()
add_OUT()
add_OUT_local() #---> it will error if remote broken, how can i fix it?

You have a couple choices:
(not as good) Ping the server regularly to keep the connection alive.
(best) Handle the MySQLdb exception when calling cur.execute by re-establishing your connection and trying the call again. Here's an excellent and concise answer for how to do just that. From that article, you handle the exception yourself:
def __execute_sql(self,sql,cursor):
try:
cursor.execute(sql)
return 1
except MySQLdb.OperationalError, e:
if e[0] == 2006:
self.logger.do_logging('info','DB', "%s : Restarting db" %(e))
self.start_database()
return 0
(lastly) Establish a new database connection just before you actually call the database entries. In this case, move the db and db_local definitions into a function which you call just before your cursor. If you're making thousands of queries, this isn't the best. However, if it's only a few database queries, it's probably fine.

I use the following method:
def checkConn(self):
sq = "SELECT NOW()"
try:
self.cur.execute( sq )
except pymysql.Error as e:
if e.errno == 2006:
return self.connect()
else:
print ( "No connection with database." )
return False

I used a simple technique. Initially, I connected to DB using:
conect = mysql.connector.connect(host=DB_HOST, user=DB_USER, password=DB_PASS, database=DB_NAME)
Whenever I need to check if the DB is still connected, I used a line:
conect.ping(reconnect=True, attempts=3, delay=2)
This will check if the DB connection is still alive. If not, it will restart the connection which solves the problem.

It just makes sense not to use a status checker function before executing a SQL. Best practice shall handle the exception afterward and reconnect to the server.
Since the client library is always on the client side, there is no way to know the server status (connect status does depend on server status of course) unless we ping it or connect it.
Even if you ping the server and make sure the connection is fine and let the code execute down to the following line, the connection theoretically still could be down within that glimpse of time. So it's still not guaranteed that you will have a good connection right after you check the connection status.
On the other hand, ping is as expensive as most operations. If your operation fails because of a bad connection, then it's as good as using the ping to check the status.
Considering these, why bother to use ping or other no-matter built-in or not-built-in functions to check the connection status? Just execute your command as if it is up, then handle the exception in case it is down. This might be the reason the mysqlclient library does not provide a built-in status checker in the first place.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

PostgreSQL query taking too long via Python - python

Related

MySQL to OPC UA Server

Troubles using async library : aiomysql (python - bot)

Running multithreaded code after Queue.task_done()

MySQL server has gone away issue

How to check the connection alive in python?

Categories

Resources