In a classical "Threading/Queue"-application. I need to do further calculations in my "consumer"-function. After Queue is empty no further code is executed after urls.task_done().
I am importing market data from an JSON api and import it into my MariaDB database.
On the API every item that i want to fetch has an own url, so I am creating a queue for all available urls in a function.
A "consumer"-function processes the queue importing a new set of data or updating an existent entry depending on the already existing data in my database. I already tried to wrap the actual while True loop into its own function but it didn't work for me.
def create_url():
try:
mariadb_connection = mariadb.connect(host='host
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
cursor.execute('SELECT type_id from tbl_items')
item_list = cursor.fetchall()
print("Create URL - Record retrieved successfully")
for row in item_list:
url = 'https://someinternet.com/type_id=' + \
str(row[0])
urls.put(url)
return urls
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table
{}".format(error))
finally:
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
def import(urls):
list_mo_esi = []
try:
mariadb_connection = mariadb.connect(host='host',
database='db',
user='user',
password='pw')
cursor = mariadb_connection.cursor()
while True:
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount('https://', HTTPAdapter(max_retries=retries))
jsonraw = s.get(urls.get())
jsondata = ujson.loads(jsonraw.text)
for row in jsondata:
cursor.execute('SELECT order_id from tbl_mo WHERE order_id = %s',
(row['order_id'], ))
exists_mo = cursor.fetchall()
list_mo_esi.append(row['order_id'])
if len(exists_mo) != 0:
print("updating order#", row['order_id'])
cursor.execute('UPDATE tbl_mo SET volume = %s, price = %s WHERE order_id = %s',
(row['volume_remain'], row['price'], row['order_id'], ))
mariadb_connection.commit()
else:
cursor.execute('INSERT INTO tbl_mo (type_id, order_id, ordertype,volume, price) VALUES (%s,%s,%s,%s,%s)',
(row['type_id'], row['order_id'], row['is_buy_order'], row['volume_remain'], row['price'], ))
mariadb_connection.commit()
urls.task_done()
except mariadb.Error as error:
mariadb_connection.rollback()
print("Failed retrieving itemtypes from tbl_items table {}".format(error))
The following finally part of my function is not executed, but should.
finally:
list_mo_purge = list(set(list_mo_sql)-set(list_mo_esi))
cursor.execute('SELECT order_id FROM tbl_mo')
list_mo_sql = cursor.fetchall()
print(len(list_mo_esi))
print(len(list_mo_sql))
if mariadb_connection.is_connected():
cursor.close()
mariadb_connection.close()
main thread
for i in range(num_threads):
worker = Thread(target=import_mo, args=(urls,))
worker.setDaemon(True)
worker.start()
create_url()
urls.join()
After all tasks are completed my worker stop executing code right after urls.task_done().
However, i have some more code after the function urls.task_done() i need to be executed for closing database connection and cleaning up my database from old entries. How can I make this "finally"-part run?
You are not breaking from the while.
You should do the following:
if urls.empty():
break
Most likely your import thread gets blocked at urls.get()
Related
I need to perform a postgres bulk update using SQLAlchemy
Right now I am using this code to update which inefficient and very slow. I wanted to know if there is a better way to perform the below action instead of looping the image names one by one:
for image in list_of_image_names:
result_update = connection.execute(f"UPDATE databse.tablename SET image_downloaded = 'I' WHERE image_name = '{image}' AND image_downloaded = 'N';")
Code for establishing a connection:
def getconn()-> pg8000.dbapi.Connection:
conn: pg8000.dbapi.Connection = connector.connect(
db_secrets['connection_name'],
"pg8000",
user='some_user',
password='some_password',
db='some_dbname',
)
return conn
engine = sqlalchemy.create_engine(
"{}://".format("postgresql+pg8000"),
creator=getconn,
)
engine.dialect.description_encoding = None
try:
conne = engine.connect()
print("DB connection successful")
except Exception as e:
print(e)
raise
return conne
I successfully access the database, however, I can't load the table inside the database. I am quite sure that the name of the table is correct, the database is a mimic iii database. Please give me a helping hand, thanks a lot!
import psycopg2
try:
connection = psycopg2.connect(user="postgres",
password="xxxxxxx",
host="localhost",
port="5432",
database="mimic")
cursor = connection.cursor()
postgreSQL_select_Query = "select * from admissions"
cursor.execute(postgreSQL_select_Query)
print("Selecting rows from mobile table using cursor.fetchall")
admissions_records = cursor.fetchall()
print("Print each row and it's columns values")
for row in admissions_records:
print("x = ", row[0], )
print("y = ", row[1])
print("z = ", row[2], "\n")
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
# closing database connection.
if connection:
cursor.close()
connection.close()
print("PostgreSQL connection is closed")
Here's the output:
Error while fetching data from PostgreSQL relation "admissions" does not exist
LINE 1: select * from admissions
^
PostgreSQL connection is closed
1.I know it will occur the error when connection is shared between threads.what about multi processing?
without db.commit(),the error will occur when events start by thread,but process will not.why?
aren't they shared the same connection from db = sql.connect(ipAddress,db='mydb')?
with db.commit(),both thread and process motheds will occur the error when sharing connection.
db = sql.connect(ipAddress,db='mydb')
def query():
ii = 0
while 1:
cur = db.cursor(sql.cursors.Cursor)
try:
ii += 1
s = 'ii:{}'.format(ii)
cur.execute('''update test set count='{}' where name = 'sean' ''' .format(s))
db.commit()
cur.execute('''select count from test ''')
rcv = cur.fetchall()
print(cur,rcv)
except (sql.Error,sql.Warning) as e:
print(e)
cur.close()
time.sleep(1)
def getdb():
while 1:
cur1 = db.cursor(sql.cursors.Cursor)
try:
cur1.execute('''select count from test where name ='sean' ''')
rcv = cur1.fetchall()
print(cur1,rcv)
except (sql.Error,sql.Warning) as e:
print(e)
cur1.close()
time.sleep(1)
event = mp.Process(target = query)
event.start()
time.sleep(3)
event = mp.Process(target = getdb)
event.start()
2.I create two connections for each thread or process.
I don't know why I got the latest value ii only at first time when both connections access the same database.How could this be happened?
db = sql.connect(ipAddress,db='mydb')
db1 = sql.connect(ipAddress,db='mydb')
def query():
ii = 0
while 1:
cur = db.cursor(sql.cursors.Cursor)
# same code given above
def getdb():
while 1:
cur1 = db1.cursor(sql.cursors.Cursor)
# same code given above
<MySQLdb.cursors.Cursor object at 0x75ff3ef0> (('ii:50',), ('!999!',), ('$5555555555$',))
<MySQLdb.cursors.Cursor object at 0x75ff3ef0> (('ii:3',),)
<MySQLdb.cursors.Cursor object at 0x75ff3ed0> (('ii:51',), ('!999!',), ('$5555555555$',))
<MySQLdb.cursors.Cursor object at 0x75ff3e50> (('ii:3',),)
<MySQLdb.cursors.Cursor object at 0x75ff3e90> (('ii:52',), ('!999!',), ('$5555555555$',))
<MySQLdb.cursors.Cursor object at 0x75ff3f70> (('ii:3',),)
<MySQLdb.cursors.Cursor object at 0x766cb0b0> (('ii:53',), ('!999!',), ('$5555555555$',))
Good day. I wrote an application on python that collects call logs from the Avaya PBX and writes them to the mysql database. It works well, but sometimes the PBX sends an empty string for some reason and the program fails. I attach the screen and code below. I understand that you need to wrap the function in an exception: try except, but I don’t understand how to do it. Please tell me how to do this.enter image description here
def write_db(item, *agrs):
connection = pymysql.connect(host='localhost',
user='acdr',
password='it8ejokd',
db='avaya_cdr',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
DBTBL = "cdr102019"
DBFLD = "Date_call, Time_call, `Sec_dur`, `clg_num_in_tag`, `dialed_num`, dep_called, dep_dialed"
dep_num_call = find_dep(item[3].replace(' ', ''))
name_dep_call = name_dep(dep_num_call)
dep_num_dial = find_dep(item[4].replace(' ', ''))
name_dep_dial = name_dep(dep_num_dial)
item.append(name_dep_call)
item.append(name_dep_dial)
item[1] = item[1] + "00"
try:
with connection.cursor() as cursor:
sql = "INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES (%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql, (item))
connection.commit()
finally:
connection.close()
# Задаем адрес сервера
SERVER_ADDRESS = ('', 5100)
# Настраиваем сокет
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind(SERVER_ADDRESS)
server_socket.listen(5)
print('server is running, please, press ctrl+c to stop')
# Слушаем запросы и пишем в db
while True:
connection, address = server_socket.accept()
data = connection.recv(1024)
if not(b'\x00\x00\x00' in data) and not(b'1370' in data):
str = data.decode("utf-8")
item=[str[0:6],str[7:11],str[12:17],str[18:33],str[34:57]]
print(item)
write_db(item)
connection.close()
You'll have to catch the exception, so we can cater for a few types, but just to be sure and get you up and running, you could do the following :)
try:
with connection.cursor() as cursor:
sql = (
"INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES "
"(%s,%s,%s,%s,%s,%s,%s)"
)
cursor.execute(
sql,
(item),
)
connection.commit()
except Exception as e:
print("Error occurred: %s"% e)
finally:
connection.close()
This should do the trick. I've used all four elements of try/except/else/finally here, with brief explanations of when they're executed.
try:
with connection.cursor() as cursor:
sql = "INSERT INTO "+DBTBL+" ("+DBFLD+") VALUES (%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql, (item))
except Exception: # If this code fails, ignore it
pass
else: # If the code inside 'try' succeeds, execute this code.
connection.commit()
finally: # Regardless of whether or not the code inside 'try' succeeds, execute this code
connection.close()
#!/usr/bin/env python
import pika
def doQuery( conn, i ) :
cur = conn.cursor()
cur.execute("SELECT * FROM table OFFSET %s LIMIT 100000", (i,))
return cur.fetchall()
print "Using psycopg2"
import psycopg2
myConnection = psycopg2.connect( host=hostname, user=username,
password=password, dbname=database )
connection =
pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
endloop = False
i = 1
while True:
results = doQuery( myConnection, i )
j = 0
while j < 10000:
try:
results[j][-1]
except:
endloop = True
break
message = str(results[j][-1]).encode("hex")
channel.basic_publish(exchange='',
routing_key='task_queue2',
body=message
#properties=pika.BasicProperties(
#delivery_mode = 2, # make message persistent
)#)
j = j + 1
# if i % 10000 == 0:
# print i
if endloop == False:
break
i = i + 10000
The SQL query is taking too long to execute when i gets to 100,000,000, but I have about two billion entries I need to put into the queue. Anyone know of a more efficient SQL query that I can run so that I can get all those two billion into the queue faster?
psycopg2 supports server-side cursors, that is, a cursor that is managed on the database server rather than in the client. The full result set is not transferred all at once to the client, rather it is fed to it as required via the cursor interface.
This will allow you to perform the query without using paging (as LIMIT/OFFSET implements), and will simplify your code. To use a server side cursor use the name parameter when creating the cursor.
import pika
import psycopg2
with psycopg2.connect(host=hostname, user=username, password=password, dbname=database) as conn:
with conn.cursor(name='my_cursor') as cur: # create a named server-side cursor
cur.execute('select * from table')
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
for row in cur:
message = str(row[-1]).encode('hex')
channel.basic_publish(exchange='', routing_key='task_queue2', body=message)
You might want to tweak cur.itersize to improve performance if necessary.