How query asynchronous postgres with aws lambda python? - python

In my case I use the pycopg2 client and I need to create a table but it gives me a time out error, this is obviously because the table takes a long time and exceeds the 15 min limit.

For my little purposes I found the following documentation that helped me a lot psycopg doc
I will leave the small implementation, note that I have separated the connection as aconn because it works differently than the normal connection, for example it does not use commit
The little detail is async_ =True in the connection line
import select
import psycopg2
def wait(conn):
while True:
state = conn.poll()
if state == psycopg2.extensions.POLL_OK:
break
elif state == psycopg2.extensions.POLL_WRITE:
select.select([], [conn.fileno()], [])
elif state == psycopg2.extensions.POLL_READ:
select.select([conn.fileno()], [], [])
else:
raise psycopg2.OperationalError("poll() returned %s" % state)
db_host = db_secret["host"]
db_name = db_secret["dbname"]
db_user = db_secret["username"]
db_pass = db_secret["password"]
aconn = None
stringConn = "dbname='%s' user='%s' host='%s' password='%s'" % (db_name, db_user, db_host, db_pass)
aconn = psycopg2.connect(stringConn , async_ =True)
wait(aconn)
acursor = aconn.cursor()
query ="CREATE TABLE CHEMA.TABLE AS SELECT * FRO BLA "
acursor.execute(query, params={})
wait(acursor.connection)
aconn.close()
#END AND EXIT

Related

Not able to update MYSQL db with python

Issue: The script runs successfully without any errors but Mysql database won't get updated with script results
I have added line db.autocommit(True) to commit every time but still, it fails.
Env: python2.7, MySQL
I have also tried to manually enter db.commit() after each executes the statement with locks but then also it fails.
'''
Specifications:
A multi-Threaded Web Spider that:
Takes website and depth of spidering as input
Downloads the HTML files only
Inserts the HTML into an MYSQL database
It also parses the forms on each page and inserts into db with form details
'''
import mechanize
import sys
import threading
import MySQLdb
lock = threading.Lock()
def Parse_Forms(target,curr,br):
lock.acquire()
br.open(target)
curr.execute("use web;");
response = []
for forms in br.forms():
i= 0
action = forms.action
method = forms.method
d = dict()
d['method'] = method
d['name'] = action
br.select_form(nr=i)
for control in forms.controls:
if control.value == '':
d[control.name] = "NULL"
elif type(control.value) is list:
d[control.name] = control.value[0]
else:
d[control.name] = control.value
for j in d:
if str(j) == 'login' or str(j) == 'name' or str(j) == 'password' or str(j) == 'method': #These are only the valid names that has to be inserted in MYSQL db
query = "INSERT INTO `forms` ("+str(j)+") values (\""+str(d[j])+"\");"
curr.execute(query)
print "Query Executed!"
i=i+1
response.append(br.submit())
lock.release()
def getHTMLfiles(target,curr):
br = mechanize.Browser()
headers = [('User-Agent','Firefoxy'),]
br.addheaders = headers
br.open(target)
for i in range(0,depth):
for link in br.links():
if ".hmtl" in link.url:
print "Downloading File: "+link.url
os.system("wget "+link.url+" -P Files/")
curr.execute("INSERT INTO `pages` (name) values ("+ "\"link.url\");")
if link.url[0] == '/' and not '.' in link.url: #Indicates that file belongs to server not some external link and is a directory
Parse_Forms(target+link.url,curr,br,db)
if __name__ == "__main__":
db = MySQLdb.connect(host="localhost",user="****",password="*****",db="web")
#There are 2 db one called pages that saves HTML file url and one forms that saves form parameteres
db.autocommit(True)
curr = db.cursor()
target = sys.argv[1]
depth = int(sys.argv[2])
threads = []
for workers in range(10):
t = threading.Thread(target = getHTMLfiles,args = (target,curr,))
t.daemon = True
t.start()
threads.append(t)
for thread in threads:
thread.join()
The script works fine but it is supposed to update the MySQL database.
Note: There are no MYSQL errors(I mean literally no errors) to everything seems to run fine

How to execute more than once the same query with different data?

I'm trying to execute the same query but with different data but I always get data the first time. The others times, dispite of there are data for the querys in the data base, mysql returns empty data.
This is the code:
def get_team_colour_map(self, players, id_competition):
tcm = FIBAColourMap()
for p in players:
args = [p["id"], id_competition]
conn = pymysql.Connect(host = DDBB.DDBB_FIBA_HOST,
user = DDBB.DDBB_FIBA_USER,
password = DDBB.DDBB_FIBA_PSWD,
db = DDBB.DDBB_FIBA_NAME,
charset = DDBB.DDBB_FIBA_CHARSET,
cursorclass=pymysql.cursors.DictCursor)
with conn.cursor() as cursor:
print("id player: {}".format(p["id"]))
print("args: {}".format(args))
cursor.execute("select sc.* from tbl030_shots_chart sc, tbl006_player_team pt, tbl007_game g, tbl004_jornada j, tbl012_competition c where pt.id = %s and pt.id_player_feb = sc.id_fiba and sc.id_game = g.id and g.id_jornada = j.id and j.id_competition = c.id and c.id = %s", args)
data = cursor.fetchall()
print("data: {}".format(data))
print("Total rows: {}".format(cursor.rowcount))
if cursor.rowcount > 0:
for s in data:
x = float(FIBASCReport.adjust_x(s["x"]))
y = float(FIBASCReport.adjust_y(s["y"]))
color = tcm.image.getpixel((x,y))
color = ("#%02x%02x%02x" % color).upper()
if tcm.exists_color(color):
if int(s["m"]) == 0:
tcm.set_scored_shots(color, 1)
else:
tcm.set_failed_shots(color, 1)
else:
if int(s["m"]) == 0:
tcm.set_scored_shots("OTROS", 1)
else:
tcm.set_failed_shots("OTROS", 1)
else:
#tcm = None
print("Jugadora con id: {} NO ha realizado ningún tiro en competición: {}".format(p["id"], id_competition))
return tcm
In this code, cursor.fetchall() returns data the first query but the next querys returns empty results.
How can I run several querys? I'm using mySQL 8.0 and Python 3.6
Its because you are using the same cursor each time. create a new instance of the cursor each time you loop through to excecute the query. After the first query is run the cursor is already positioned after all the data. Hence no rows returned after that
You can also try this:
Look at the documentation for MySQLCursor.execute().
It claims that you can pass in a multi parameter that allows you to run multiple queries in one string.
If multi is set to True, execute() is able to execute multiple statements specified in the operation string.
multi is an optional second parameter to the execute() call:
operation = 'SELECT 1; INSERT INTO t1 VALUES (); SELECT 2'
for result in cursor.execute(operation, multi=True):

Python3 + beatbox : Not able to querymore

I have logged into my SFDC org using instructions provided here http://tomhayden3.com/2013/08/04/salesforce-python/. However, I am not able to implement the queryMore part of it. It just does nothing. When I print(query_locator) it prints out an ID with a suffix -500. Can someone please look into this code and highlight what am I doing wrong?
#!/usr/bin/env python3
import beatbox
# Connecting to SFDC
sf = beatbox._tPartnerNS
service = beatbox.Client()
service.serverUrl = 'https://test.salesforce.com/services/Soap/u/38.0'
service.login('my-username', 'my-password')
query_result = service.query("SELECT id, Name, Department FROM User")
records = query_result['records'] # dictionary of results!
total_records = query_result['size'] # full size of results
query_locator = query_result['queryLocator'] # get the mystical queryLocator
# loop through, pulling the next 500 and appending it to your records dict
while query_result['done'] is False and len(records) < total_records:
query_result = self._service.queryMore(query_locator)
query_locator = query_result['queryLocator'] # get the updated queryLocator
records = records + query_result['records'] # append to records dictionary
print(records['id']) #This should print all IDs??? But it is not.
The examples here resolved the issue for me.
https://github.com/superfell/Beatbox/blob/master/examples/export.py
#!/usr/bin/env python3
import beatbox
import sqlalchemy
engine_str = 'mysql+mysqlconnector://db-username:db-pass#localhost/db-name'
engine = sqlalchemy.create_engine(engine_str, echo=False, encoding='utf-8')
connection = engine.connect()
sf = beatbox._tPartnerNS
service = beatbox.Client()
service.serverUrl = 'https://test.salesforce.com/services/Soap/u/38.0' #I hard quoted it since I was to test against sandbox only.
def export(objectSOQL):
service.login('sfdc-username', 'sfdc-pass')
query_result = service.query(objectSOQL)
while True:
for row in query_result[sf.records:]:
SQL_query = 'INSERT INTO user(' \
'id, ' \
'name, ' \
'department ' \
'VALUES(' \
'\"{}\",\"{}\",\"{}\")'\
.format(
row[2],
row[3],
row[4]
)
try:
connection.execute(SQL_query)
except Exception as e:
print(e)
# This is key part which actually pulls records beyond 500 until sf.done becomes true which means the query has been completed.
if str(query_result[sf.done]) == 'true':
break
query_result = service.queryMore(str(query_result[sf.queryLocator]))
SOQL = 'SELECT id, Name, Department FROM User'
export(SOQL)

Process very large 900M row MySQL table line by line with Python

I often need to process several hundred million rows of a MySQL table on a line by line basis using Python. I want a script that is robust and does not need to be monitored.
Below I pasted a script that classifying the language of the message field in my row. It utilizes the sqlalchemy and MySQLdb.cursors.SSCursor modules. Unfortunately this script consistently throws a 'Lost connection to MySQL server during query' error after 4840 rows when I run remotely and 42000 rows when I run locally.
Also, I have checked and max_allowed_packet = 32M on my MySQL server's /etc/mysql/my.cnf file as per the answers to this stackoverflow question Lost connection to MySQL server during query
Any advice for either fixing this error, or using another approach to use Python for processing very large MySQL files in a robust way would be much appreciated!
import sqlalchemy
import MySQLdb.cursors
import langid
schema = "twitterstuff"
table = "messages_en" #900M row table
engine_url = "mysql://myserver/{}?charset=utf8mb4&read_default_file=~/.my.cnf".format(schema)
db_eng = sqlalchemy.create_engine(engine_url, connect_args={'cursorclass': MySQLdb.cursors.SSCursor} )
langid.set_languages(['fr', 'de'])
print "Executing input query..."
data_iter = db_eng.execute("SELECT message_id, message FROM {} WHERE langid_lang IS NULL LIMIT 10000".format(table))
def process(inp_iter):
for item in inp_iter:
item = dict(item)
(item['langid_lang'], item['langid_conf']) = langid.classify(item['message'])
yield item
def update_table(update_iter):
count = 0;
for item in update_iter:
count += 1;
if count%10 == 0:
print "{} rows processed".format(count)
lang = item['langid_lang']
conf = item['langid_conf']
message_id = item['message_id']
db_eng.execute("UPDATE {} SET langid_lang = '{}', langid_conf = {} WHERE message_id = {}".format(table, lang, conf, message_id))
data_iter_upd = process(data_iter)
print "Begin processing..."
update_table(data_iter_upd)
According to MySQLdb developer Andy Dustman,
[When using SSCursor,] no new queries can be issued on the connection until
the entire result set has been fetched.
That post says that if you issue another query you will get a "commands out of sequence" error, which is not the error you are seeing. So I am not sure that the following will necessarily fix your problem. Nevertheless, it might be worth trying to remove SSCursor from your code and use the simpler default Cursor just to test if that is the source of the problem.
You could, for example, use LIMIT chunksize OFFSET n in your SELECT statement
to loop through the data set in chunks:
import sqlalchemy
import MySQLdb.cursors
import langid
import itertools as IT
chunksize = 1000
def process(inp_iter):
for item in inp_iter:
item = dict(item)
(item['langid_lang'], item['langid_conf']) = langid.classify(item['message'])
yield item
def update_table(update_iter, engine):
for count, item in enumerate(update_iter):
if count%10 == 0:
print "{} rows processed".format(count)
lang = item['langid_lang']
conf = item['langid_conf']
message_id = item['message_id']
engine.execute(
"UPDATE {} SET langid_lang = '{}', langid_conf = {} WHERE message_id = {}"
.format(table, lang, conf, message_id))
schema = "twitterstuff"
table = "messages_en" #900M row table
engine_url = ("mysql://myserver/{}?charset=utf8mb4&read_default_file=~/.my.cnf"
.format(schema))
db_eng = sqlalchemy.create_engine(engine_url)
langid.set_languages(['fr', 'de'])
for offset in IT.count(start=0, step=chunksize):
print "Executing input query..."
result = db_eng.execute(
"SELECT message_id, message FROM {} WHERE langid_lang IS NULL LIMIT {} OFFSET {}"
.format(table, chunksize, offset))
result = list(result)
if not result: break
data_iter_upd = process(result)
print "Begin processing..."
update_table(data_iter_upd, db_eng)

Executing several SQL queries with MySQLdb

How would you go about executing several SQL statements (script mode) with python?
Trying to do something like this:
import MySQLdb
mysql = MySQLdb.connect(host='host...rds.amazonaws.com', db='dbName', user='userName', passwd='password')
sql = """
insert into rollout.version (`key`, `value`) VALUES ('maxim0', 'was here0');
insert into rollout.version (`key`, `value`) VALUES ('maxim1', 'was here1');
insert into rollout.version (`key`, `value`) VALUES ('maxim2', 'was here1');
"""
mysql.query(sql)
Fails with:
ProgrammingError: (2014, "Commands out
of sync; you can't run this command
now")
I'm writing an deployment engine that would accept SQL delta changes from several people and apply them to the DB on version deployment.
I've looked into this code http://sujitpal.blogspot.com/2009/02/python-sql-runner.html and implemented __sanitize_sql:
def __sanitize_sql(sql):
# Initial implementation from http://sujitpal.blogspot.com/2009/02/python-sql-runner.html
sql_statements = []
incomment = False
in_sqlcollect = False
sql_statement = None
for sline in sql.splitlines():
# Remove white space from both sides.
sline = sline.strip()
if sline.startswith("--") or len(sline) == 0:
# SQL Comment line, skip
continue
if sline.startswith("/*"):
# start of SQL comment block
incomment = True
if incomment and sline.endswith("*/"):
# end of SQL comment block
incomment = False
continue
# Collect line which is part of
if not incomment:
if sql_statement is None:
sql_statement = sline
else:
sql_statement += sline
if not sline.endswith(";"):
in_sqlcollect = True
if not in_sqlcollect:
sql_statements.append(sql_statement)
sql_statement = None
in_sqlcollect = False
if not incomment and not sql_statement is None and len(sql_statement) != 0:
sql_statements.append(sql_statement)
return sql_statements
if __name__ == "__main__":
sql = sql = """update tbl1;
/* This
is my
beautiful
comment*/
/*this is comment #2*/
some code...;
-- comment
sql code
"""
print __sanitize_sql(sql)
Don't know if it's the best solution but seems to work for not too complex to parse SQL statements.
The question now how to run this code, I can do something like this dude but it seems ugly, I'm not a python expert (we've been doing python here for just the past 2 weeks) but it seems that abusing cursor this way is hackish and not a good practice.
Ideas / blog posts would be helpful.
Thanks you,
Maxim.
Here is how you could use executemany():
import MySQLdb
connection = MySQLdb.connect(host='host...rds.amazonaws.com', db='dbName', user='userName', passwd='password')
cursor = connection.cursor()
my_data_to_insert = [['maxim0', 'was here0'], ['maxim1', 'was here1'], ['maxim2', 'was here1']]
sql = "insert into rollout.version (`key`, `value`) VALUES (%s, %s);"
cursor.executemany(sql, my_data_to_insert)
connection.commit()
connection.close()
Call the executemany method on the cursor object. More info here:
http://mysql-python.sourceforge.net/MySQLdb.html

Categories

Resources