Python : threading.Thread block - python

import pymysql
from multiprocessing.pool import ThreadPool
def process(item):
host = str(item).strip()
db = None
try:
db = pymysql.connect(host=host, user='root', passwd='root', port=3306, connect_timeout=10,
write_timeout=10)
r = open("success.txt", 'a')
r.write(host)
r.write("\n")
pass
except Exception as msg:
print(msg)
pass
finally:
if db:
db.close()
pass
if __name__ == '__main__':
t = ThreadPool(100)
t.map_async(process,open('3306.txt').readlines())
t.close()
t.join()
I want to verify in batches whether the MySQL account passwords of some IPs are valid, but my code will have the problem of sub-thread blocking. In fact, the last IP has been verified, but the main thread has not ended. I don't know how to modify it. You can provide Some modifications and suggestions?

Related

How to set connection with database in the api Flask?

I'm trying to create the api on my linux PC. At this moment I have support for some basic requests which were done just for testing. My api works in cooperation with uswgi+nginx+flask. And now I'm trying to add connection to the database. For this purpose I had installed MySQL and created database. But I don't understand how to connect from the api to database. For example here is code of the script which can connect to the DB but it works separately of the api:
try:
connection = mysql.connector.connect(host='localhost',
database='tired_db',
user='test',
password='pw')
if connection.is_connected():
mycursor = connection.cursor()
mycursor.execute("SHOW TABLES")
for x in mycursor:
print(x)
return connection
except Error as e:
print("Error while connecting to MySQL", e)
finally:
if connection.is_connected():
mycursor.close()
connection.close()
print("MySQL connection is closed")
and it works correctly. I thought that maybe I can call this connection like some metaclass:
import mysql.connector
from mysql.connector import Error
class DbProvider(type):
#property
def my_data(cls):
try:
connection = mysql.connector.connect(host='localhost',
database='tired_db',
user='test',
password='pw')
if connection.is_connected():
mycursor = connection.cursor()
mycursor.execute("SHOW TABLES")
for x in mycursor:
print(x)
return connection
except Error as e:
print("Error while connecting to MySQL", e)
finally:
if connection.is_connected():
mycursor.close()
connection.close()
print("MySQL connection is closed")
class MyClass(metaclass=DbProvider):
pass
if __name__ == "__main__":
MyClass.my_data
but I think that such stuff can be done with more efficient way. For example here is some request in the api:
#app.route("/api/login", methods = ['POST'])
def logIn():
return "all is ok"
and the idea is that for example I have to connect during this request to the DB and check whether a user exists or not and if all is ok generate+save some token to the database. I don't understand whether it is important to keep connection alive during all api uptime or only during requests. And also is it important to close connection after an every request or we have to keep alive it forever. And also how to call connection from separate class, or I have to have all stuff in one file together with api calls.
You can write a connection manager class that connects to the database, performs operations, and closes the connection.
import mysql.connector
from mysql.connector import Error
class ConnectionManager:
def __init__(self, host, database, user, password):
self.host = host
self.database = database
self.user = user
self.password = password
def __enter__(self):
try:
self.connection = mysql.connector.connect(
host=self.host,
database=self.database,
user=self.user,
password=self.password
)
if self.connection.is_connected():
self.cursor = self.connection.cursor()
return self.cursor
except Error as e:
print("Error while connecting to MySQL", e)
def __exit__(self, type, value, traceback):
if self.connection.is_connected():
self.cursor.close()
self.connection.close()
print("MySQL connection is closed")
#app.route("/api/login", methods = ['POST'])
def logIn():
with ConnectionManager('localhost', 'tired_db', 'test', 'pw') as cursor:
cursor.execute("SELECT * FROM users WHERE username = %s", ('test_user',))
result = cursor.fetchone()
if result:
# generate and save token
return "all is ok"
else:
return "user not found"

Locust script is getting stuck with ThreadedConnectionPool

I'm trying to write a load testing script using locust in python (it uses gevent & greenlet for multithreading internally as per my understanding); but my script is getting stuck when I try to put a db connection (postgres) back to connection pool inside a thread. I have defined connection pool variable as gloabl & then trying to create & put connections back inside threads; I have no experience in threading; not sure if these lines marked inside ** quotes are the reason locust gets stuck -
#events.test_start.add_listener # executes one time at the start of test run
def on_test_start(**kw):
**global t_pool** # connection pool variable defined as global
conn_st = config(fname = os.path.join('.', 'db'), instance = 'xxx')
try:
t_pool = pool.ThreadedConnectionPool(1, 100, **conn_st)
#cur = db.cursor()
except Exception as err:
#db = None
raise DatabaseError("DB Connection could not be established - %s" %(err))
#contextmanager
def get_con():
con = t_pool.getconn()
try:
yield con
finally:
**t_pool.putconn(con)**
def send_xxx():
ind = random.randint(0, (len(xxx_data['yyy'])-1))
body['toeknx'] = xxx_data['yyy'][ind]
res = requests.post(url, headers=headers, json=body)
res.raise_for_status()
with get_con() as con:
cur = con.cursor()
while True:
cur.execute("test query")
token = cur.fetchone()
if token is None:
continue
else:
cur.execute("another test query")
out = cur.fetchone()
if zzz:
continue
else:
cur.execute("final test query")
out1 = cur.fetchone()
time_diff = out1[1] - out1[0]
cur.close()
You need to use psycogreen to make psycopg2 gevent-friendly.
Something like:
import gevent
import gevent.monkey
gevent.monkey.patch_all()
import psycogreen.gevent
psycogreen.gevent.patch_psycopg()
Full example here:
https://github.com/SvenskaSpel/locust-plugins/blob/master/locust_plugins/listeners.py

How to close sqlite connection in daemon thread?

I have multiple threads that process data and puts it on a queue, and a single thread that takes data from a queue and then saves it to a database.
I think the following will cause a memory leak:
class DBThread(threading.Thread):
def __init__(self, myqueue):
threading.Thread.__init__(self)
self.myqueue = myqueue
def run(self):
conn = sqlite3.connect("test.db")
c = conn.cursor()
while True:
data = myqueue.get()
if data:
c.execute("INSERT INTO test (data) VALUES (?)", (data,))
conn.commit()
self.myqueue.task_done()
#conn.close() <--- never reaches this point
q = Queue.Queue()
# Create other threads
....
# Create DB thread
t = DBThread(q)
t.setDaemon(True)
t.start()
q.join()
I can't put the conn.close() in the while loop, because I think that will close the connection on the first loop. I can't put it in the if data: statement, because then it won't save data that may be put in the queue later.
Where do I close the db connection? If I don't close it, won't this cause a memory leak?
If you can use a sentinel value that will not appear in your normal data, e.g. None, you can signal the thread to stop and close the database connection in a finally clause:
import threading
import Queue
import sqlite3
class DBThread(threading.Thread):
def __init__(self, myqueue, db_path):
threading.Thread.__init__(self)
self.myqueue = myqueue
self.db_path = db_path
def run(self):
conn = sqlite3.connect(self.db_path)
try:
while True:
data = self.myqueue.get()
if data is None: # check for sentinel value
break
with conn:
conn.execute("INSERT INTO test (data) VALUES (?)", (data,))
self.myqueue.task_done()
finally:
conn.close()
q = Queue.Queue()
for i in range(100):
q.put(str(i))
conn = sqlite3.connect('test.db')
conn.execute('create table if not exists test (data text)')
conn.close()
t = DBThread(q, 'test.db')
t.start()
q.join()
q.put(None) # tell database thread to terminate
If you cannot use a sentinel value you can add a flag to the class that is checked in the while loop. Also add a stop() method to the thread class that sets the flag. You will need to use a non-blocking Queue.get():
class DBThread(threading.Thread):
def __init__(self, myqueue, db_path):
threading.Thread.__init__(self)
self.myqueue = myqueue
self.db_path = db_path
self._terminate = False
def terminate(self):
self._terminate = True
def run(self):
conn = sqlite3.connect(self.db_path)
try:
while not self._terminate:
try:
data = self.myqueue.get(timeout=1)
except Queue.Empty:
continue
with conn:
conn.execute("INSERT INTO test (data) VALUES (?)", (data,))
self.myqueue.task_done()
finally:
conn.close()
....
q.join()
t.terminate() # tell database thread to terminate
Finally, it's worth mentioning that your program could terminate if the db thread manages to drain the queue, i.e. if q.join() returns. This is because the db thread is a daemon thread and will not prevent the main thread exiting. You need to ensure that your worker threads produce enough data to keep the db thread busy, otherwise q.join() will return and the main thread will exit.

Accessing a MySQL connection pool from Python multiprocessing

I'm trying to set up a MySQL connection pool and have my worker processes access the already established pool instead of setting up a new connection each time.
I'm confused if I should pass the database cursor to each process, or if there's some other way to do this? Shouldn't MySql.connector do the pooling automatically? When I check my log files, many, many connections are opened and closed ... one for each process.
My code looks something like this:
PATH = "/tmp"
class DB(object):
def __init__(self):
connected = False
while not connected:
try:
cnxpool = mysql.connector.pooling.MySQLConnectionPool(pool_name = "pool1",
**config.dbconfig)
self.__cnx = cnxpool.get_connection()
except mysql.connector.errors.PoolError:
print("Sleeping.. (Pool Error)")
sleep(5)
except mysql.connector.errors.DatabaseError:
print("Sleeping.. (Database Error)")
sleep(5)
self.__cur = self.__cnx.cursor(cursor_class=MySQLCursorDict)
def execute(self, query):
return self.__cur.execute(query)
def isValidFile(self, name):
return True
def readfile(self, fname):
d = DB()
d.execute("""INSERT INTO users (first_name) VALUES ('michael')""")
def main():
queue = multiprocessing.Queue()
pool = multiprocessing.Pool(None, init, [queue])
for dirpath, dirnames, filenames in os.walk(PATH):
full_path_fnames = map(lambda fn: os.path.join(dirpath, fn),
filenames)
full_path_fnames = filter(is_valid_file, full_path_fnames)
pool.map(readFile, full_path_fnames)
if __name__ == '__main__':
sys.exit(main())
First, you're creating a different connection pool for each instance of your DB class. The pools having the same name doesn't make them the same pool
From the documentation:
It is not an error for multiple pools to have the same name. An application that must distinguish pools by their pool_name property should create each pool with a distinct name.
Besides that, sharing a database connection (or connection pool) between different processes would be a bad idea (and i highly doubt it would even work correctly), so each process using it's own connections is actually what you should aim for.
You could just initialize the pool in your init initializer as a global variable and use that instead.
Very simple example:
from multiprocessing import Pool
from mysql.connector.pooling import MySQLConnectionPool
from mysql.connector import connect
import os
pool = None
def init():
global pool
print("PID %d: initializing pool..." % os.getpid())
pool = MySQLConnectionPool(...)
def do_work(q):
con = pool.get_connection()
print("PID %d: using connection %s" % (os.getpid(), con))
c = con.cursor()
c.execute(q)
res = c.fetchall()
con.close()
return res
def main():
p = Pool(initializer=init)
for res in p.map(do_work, ['select * from test']*8):
print(res)
p.close()
p.join()
if __name__ == '__main__':
main()
Or just use a simple connection instead of a connection pool, as only one connection will be active in each process at a time anyway.
The number of concurrently used connections is implicitly limited by the size of the multiprocessing.Pool.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import time
import mysql.connector.pooling
dbconfig = {
"host":"127.0.0.1",
"port":"3306",
"user":"root",
"password":"123456",
"database":"test",
}
class MySQLPool(object):
"""
create a pool when connect mysql, which will decrease the time spent in
request connection, create connection and close connection.
"""
def __init__(self, host="172.0.0.1", port="3306", user="root",
password="123456", database="test", pool_name="mypool",
pool_size=3):
res = {}
self._host = host
self._port = port
self._user = user
self._password = password
self._database = database
res["host"] = self._host
res["port"] = self._port
res["user"] = self._user
res["password"] = self._password
res["database"] = self._database
self.dbconfig = res
self.pool = self.create_pool(pool_name=pool_name, pool_size=pool_size)
def create_pool(self, pool_name="mypool", pool_size=3):
"""
Create a connection pool, after created, the request of connecting
MySQL could get a connection from this pool instead of request to
create a connection.
:param pool_name: the name of pool, default is "mypool"
:param pool_size: the size of pool, default is 3
:return: connection pool
"""
pool = mysql.connector.pooling.MySQLConnectionPool(
pool_name=pool_name,
pool_size=pool_size,
pool_reset_session=True,
**self.dbconfig)
return pool
def close(self, conn, cursor):
"""
A method used to close connection of mysql.
:param conn:
:param cursor:
:return:
"""
cursor.close()
conn.close()
def execute(self, sql, args=None, commit=False):
"""
Execute a sql, it could be with args and with out args. The usage is
similar with execute() function in module pymysql.
:param sql: sql clause
:param args: args need by sql clause
:param commit: whether to commit
:return: if commit, return None, else, return result
"""
# get connection form connection pool instead of create one.
conn = self.pool.get_connection()
cursor = conn.cursor()
if args:
cursor.execute(sql, args)
else:
cursor.execute(sql)
if commit is True:
conn.commit()
self.close(conn, cursor)
return None
else:
res = cursor.fetchall()
self.close(conn, cursor)
return res
def executemany(self, sql, args, commit=False):
"""
Execute with many args. Similar with executemany() function in pymysql.
args should be a sequence.
:param sql: sql clause
:param args: args
:param commit: commit or not.
:return: if commit, return None, else, return result
"""
# get connection form connection pool instead of create one.
conn = self.pool.get_connection()
cursor = conn.cursor()
cursor.executemany(sql, args)
if commit is True:
conn.commit()
self.close(conn, cursor)
return None
else:
res = cursor.fetchall()
self.close(conn, cursor)
return res
if __name__ == "__main__":
mysql_pool = MySQLPool(**dbconfig)
sql = "select * from store WHERE create_time < '2017-06-02'"
p = Pool()
for i in range(5):
p.apply_async(mysql_pool.execute, args=(sql,))
Code above creates a connection pool at the beginning, and get connections from it in execute(), once the connection pool has been created, the work is to remain it, since the pool is created only once, it will save the time to request for a connection every time you would like to connect to MySQL.
Hope it helps!
You created multiple DB object instance. In mysql.connector.pooling.py, pool_name is only a attribute to let you make out which pool it is. There is no mapping in the mysql pool.
So, you create multiple DB instance in def readfile(), then you will have several connection pool.
A Singleton is useful in this case.
(I spent several hours to find it out. In Tornado framework, each http get create a new handler, which leads to making a new connection.)
There may be synchronization issues if you're going to reuse MySQLConnection instances maintained by a pool, but just sharing a MySQLConnectionPool instance between worker processes and using connections retrieved by calling the method get_connection() would be okay, because a dedicated socket would be created for each MySQLConnection instance.
import multiprocessing
from mysql.connector import pooling
def f(cnxpool: pooling.MySQLConnectionPool) -> None:
# Dedicate connection instance for each worker process.
cnx = cnxpool.get_connection()
...
if __name__ == '__main__':
cnxpool = pooling.MySQLConnectionPool(
pool_name='pool',
pool_size=2,
)
p0 = multiprocessing.Process(target=f, args=(cnxpool,))
p1 = multiprocessing.Process(target=f, args=(cnxpool,))
p0.start()
p1.start()

Python mysql (using pymysql) auto reconnect

I'm not sure if this is possible, but I'm looking for a way to reconnect to mysql database when the connection is lost. All the connections are held in a gevent queue but that shouldn't matter I think. I'm sure if I put some time in, I can come up with a way to reconnect to the database. However I was glancing pymysql code and I saw that there is a 'ping' method in Connection class, which I'm not sure exactly how to use.
The method looks like it will reconnect first time but after that it switched the reconnect flag to False again? Can I use this method, or is there a different way to establish connection if it is lost? Even if it is not pymysql how do people tackle, database servers going down and having to re-establish connection to mysql server?
def ping(self, reconnect=True):
''' Check if the server is alive '''
if self.socket is None:
if reconnect:
self._connect()
reconnect = False
else:
raise Error("Already closed")
try:
self._execute_command(COM_PING, "")
return self._read_ok_packet()
except Exception:
if reconnect:
self._connect()
return self.ping(False)
else:
raise
Well, I've got the same problem in my application and I found a method on the PyMySQL documentation that pings to the server and check if the connection was closed or not, if it was closed, then it reconnects again.
from pymysql import connect
from pymysql.cursors import DictCursor
# create the connection
connection = connect(host='host', port='port', user='user',
password='password', db='db',
cursorclass=DictCursor)
# get the cursor
cursor = connection.cursor()
# if the connection was lost, then it reconnects
connection.ping(reconnect=True)
# execute the query
cursor.execute(query)
I hope it helps.
Finally got a working solution, might help someone.
from gevent import monkey
monkey.patch_socket()
import logging
import gevent
from gevent.queue import Queue
import pymysql as db
logging.basicConfig(level=logging.DEBUG)
LOGGER = logging.getLogger("connection_pool")
class ConnectionPool:
def __init__(self, db_config, time_to_sleep=30, test_run=False):
self.username = db_config.get('user')
self.password = db_config.get('password')
self.host = db_config.get('host')
self.port = int(db_config.get('port'))
self.max_pool_size = 20
self.test_run = test_run
self.pool = None
self.time_to_sleep = time_to_sleep
self._initialize_pool()
def get_initialized_connection_pool(self):
return self.pool
def _initialize_pool(self):
self.pool = Queue(maxsize=self.max_pool_size)
current_pool_size = self.pool.qsize()
if current_pool_size < self.max_pool_size: # this is a redundant check, can be removed
for _ in xrange(0, self.max_pool_size - current_pool_size):
try:
conn = db.connect(host=self.host,
user=self.username,
passwd=self.password,
port=self.port)
self.pool.put_nowait(conn)
except db.OperationalError, e:
LOGGER.error("Cannot initialize connection pool - retrying in {} seconds".format(self.time_to_sleep))
LOGGER.exception(e)
break
self._check_for_connection_loss()
def _re_initialize_pool(self):
gevent.sleep(self.time_to_sleep)
self._initialize_pool()
def _check_for_connection_loss(self):
while True:
conn = None
if self.pool.qsize() > 0:
conn = self.pool.get()
if not self._ping(conn):
if self.test_run:
self.port = 3306
self._re_initialize_pool()
else:
self.pool.put_nowait(conn)
if self.test_run:
break
gevent.sleep(self.time_to_sleep)
def _ping(self, conn):
try:
if conn is None:
conn = db.connect(host=self.host,
user=self.username,
passwd=self.password,
port=self.port)
cursor = conn.cursor()
cursor.execute('select 1;')
LOGGER.debug(cursor.fetchall())
return True
except db.OperationalError, e:
LOGGER.warn('Cannot connect to mysql - retrying in {} seconds'.format(self.time_to_sleep))
LOGGER.exception(e)
return False
# test (pytest compatible) -------------------------------------------------------------------------------------------
import logging
from src.py.ConnectionPool import ConnectionPool
logging.basicConfig(level=logging.DEBUG)
LOGGER = logging.getLogger("test_connection_pool")
def test_get_initialized_connection_pool():
config = {
'user': 'root',
'password': '',
'host': '127.0.0.1',
'port': 3305
}
conn_pool = ConnectionPool(config, time_to_sleep=5, test_run=True)
pool = conn_pool.get_initialized_connection_pool()
# when in test run the port will be switched back to 3306
# so the queue size should be 20 - will be nice to work
# around this rather than test_run hack
assert pool.qsize() == 20
The easiest way is to check the connection right before sending a query.
You can do this by creating a small class that contains two methods: connect and query:
import pymysql
import pymysql.cursors
class DB:
def connect(self):
self.conn = pymysql.connect(
host=hostname,
user=username,
password=password,
db=dbname,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
port=3306)
def query(self, sql):
try:
cursor = self.conn.cursor()
cursor.execute(sql)
except pymysql.OperationalError:
self.connect()
cursor = self.conn.cursor()
cursor.execute(sql)
return cursor
db = DB()
Now, whenever you send a query using db.query("example SQL") the request is automatically prepared to encounter a connection error and reconnects using self.connect() if it needs to.
Remember: This is a simplified example. Normally, you would want to let PyMySQL help you escape special characters in your queries. To do that, you would have to add a 2nd parameter in the query method and go from there.
the logic is quite simple, if connection close then try to reconnect for several times in this case I use max tries for 15 times to reconnect or ping.
import pymysql, pymysql.cursors
conn = pymysql.connect(
host=hostname,
user=username,
password=password,
db=dbname,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor,
)
cursor = conn.cursor()
# you can do transactions to database and when you need conn later, just make sure the server is still connected
if conn.open is False:
max_try = 15
try = 0
while conn.open is False:
if try < max_try:
conn.ping() # autoreconnect is true by default
try +=1
# check the conn again to make sure it connected
if conn.open:
# statements when conn is successfully reconnect to the server
else:
# it must be something wrong : server, network etc
Old but I encountered a similar problem for accessing hosted db within programs. The solution I ended up using was to create a decorator to automatically reconnect when making a query.
given a connection function:
def connect(self):
self.conn = mysql.connector.connect(host=self.host, user=self.user,
database=self.database, password=self.password)
self.cursor = self.conn.cursor()
print("Established connectionn...")
I created
def _reconnect(func):
#wraps(func)
def rec(self,*args,**kwargs):
try:
result = func(self,*args,**kwargs)
return result
except (mysql.connector.Error, mysql.connector.Warning) as e:
self.connect()
result = func(self,*args,**kwargs)
return result
return rec
Such that any function using the connection can now be decorated as so
#_reconnect
def check_user_exists(self,user_id):
self.cursor.execute("SELECT COUNT(*) FROM _ where user_id={};".format(user_id))
if self.cursor.fetchall()[0][0]==0:
return False
else:
return True
This decorator will re-establish a connection and rerun any function involving a query to the db.
You can use a property to keep the connection alive every time you do querying:
import pymysql
import pymysql.cursors
import pandas as pd
class DB:
def __init__(self, hostname='1.1.1.1', username='root', password='password',
database=None, port=3306, charset="utf8mb4"):
self.hostname = hostname
self.database = database
self.username = username
self.password = password
self.port = port
self.charset = charset
self.connect()
#property
def conn(self):
if not self.connection.open:
print('Going to reconnect')
self.connection.ping(reconnect=True)
return self.connection
def connect(self):
self.connection = pymysql.connect(
host=self.hostname,
user=self.username,
password=self.password,
db=self.database,
charset=self.charset,
cursorclass=pymysql.cursors.DictCursor,
port=self.port)
def query(self, sql):
return pd.read_sql_query(sql, con=self.conn)
db = DB(hostname='1.1.1.1', username='root', password='password', database=None, port=3306, charset="utf8mb4")

Categories

Resources