I have a python AWS lambda function that takes JSON records, checks them to see if they have required keys, and then inserts into a MySQL db (AWS RDS Aurora). The function gets invoked whenever a new record comes into the stream def handler.
At the moment, Lambda is reporting some errors, but when I look at cloudwatch logs I don't see any errors, which leads me to believe that maybe I'm not handling or catching the exception. Can anyone tell me where the issue might be?
from __future__ import print_function
import base64
import json
import pymysql
RDS_HOST = 'host'
DB_USER = 'dummy_user'
DB_PASSWORD = 'password1234'
DB_NAME = 'crazy_name'
DB_TABLE = 'wow_table'
class MYSQL(object):
'''
This a wrapper Class for PyMySQL
'''
CONNECTION_TIMEOUT = 30
def __init__(self, host, user, password, database, table):
self.host = host
self.user = user
self.password = password
self.database = database
self.table = table
self.connection = self.connect()
def connect(self):
'''
Connects to MySQL instance
'''
try:
connection = pymysql.connect(
host=self.host,
user=self.user,
password=self.password,
db=self.database,
connect_timeout=self.CONNECTION_TIMEOUT
)
return connection
except Exception as ex:
print(ex)
print("ERROR: Unexpected error: Could not connect to AuroraDB instance")
def execute(self, account_id, external_ref_id, timestamp):
'''
Executes command given a MySQL connection
'''
with self.connection.cursor() as cursor:
sql = ('INSERT INTO ' +
self.database +
'.' +
self.table +
'(`account_id`, `external_reference_id`, `registration`, `c_name`, `c_id`, `create_date`)' +
' VALUES (%s, %s, DATE_FORMAT(STR_TO_DATE(%s,"%%Y-%%M-%%d %%H:%%i:%%s"),"%%Y-%%m-%%d %%H:%%i:%%s"), %s, %s, current_timestamp())' +
' ON DUPLICATE KEY UPDATE create_date = VALUES(create_date)')
cursor.execute(sql, (
account_id,
external_ref_id,
timestamp,
'bingo',
300)
)
self.connection.commit()
def close_connection(self):
'''
Closes connection to MySQL
'''
self.connection.close()
def get_data_from_kinesis_object(obj):
'''
Retrieves data from kinesis event
'''
return obj['kinesis']['data']
def decode_data(data):
'''
Decodes record via base64
'''
return base64.b64decode(data)
def split_records_into_record(records):
'''
Splits a record of records into an array of records
'''
return records.split('\n')
def parse_record(record):
'''
parses record into JSON
'''
if record:
return json.loads(record)
def is_record_valid(record):
'''
Check for keys in event
returns True if they all exist
and False if they dont all exist
'''
return all(key in record for key in (
'eventName',
'sourceType',
'AccountId',
'Timestamp',
'ExternalReferenceId'
))
def handler(event, context):
"""
This function inserts data into Aurora RDS instance
"""
mysql = MYSQL(RDS_HOST, DB_USER, DB_PASSWORD, DB_NAME, DB_TABLE)
for obj in event['Records']:
records = decode_data(get_data_from_kinesis_object(obj))
split_records = split_records_into_record(records)
for record in split_records:
parsed_record = parse_record(record)
if is_record_valid(parsed_record):
mysql.execute(
parsed_record['AccountId'],
parsed_record['ExternalReferenceId'],
str(parsed_record['Timestamp'])
)
mysql.close_connection()
Related
I am running a data parser/web scraper with python. The parser then pushes the data (SQL Tables) to postgresql. However, I can't find the tables in pgadmin. This is part of a full stack django webapp, using docker, which I did not create but I am trying to get to run locally. As far as I can tell docker containers are working as intended, and so is the dataparsing script. Since I don't know much about issues like this please let me know if there is anything else I should include
Database connection in python
import psycopg2
import logging
import sys
import os
class DatabaseConnection(object):
def __init__(self, user="postgres", password="1234", host="127.0.0.1", port="5432", database="postgres",
course_table="course_info", prereqs_table="prereqs", antireqs_table="antireqs",
requirements_table="requirements", communications_table="communications",
breadth_table="breadth_table"):
if os.getenv("UWPATH_ENVIRONMENT") is not None and os.getenv("UWPATH_ENVIRONMENT") == "docker":
host = "db"
if os.getenv("DB_PASS") is not None:
password = os.getenv("DB_PASS")
if os.getenv("DB_USER") is not None:
user = os.getenv("DB_USER")
if os.getenv("DB_NAME") is not None:
database = os.getenv("DB_NAME")
if os.getenv("DB_HOST") is not None:
host = os.getenv("DB_HOST")
if os.getenv("DB_PORT") is not None:
port = os.getenv("DB_PORT")
self.connection = psycopg2.connect(user=user, password=password, host=host, port=port, database=database)
self.cursor = self.connection.cursor()
self.course_table = course_table
self.prereqs_table = prereqs_table
self.antireqs_table = antireqs_table
self.requirements_table = requirements_table
self.communications_table = communications_table
self.breadth_table = breadth_table
self.root = self.__Logger()
def __Logger(self):
self.logger = logging.getLogger()
if not len(self.logger.handlers):
self.logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
return self.logger
def execute(self, command):
try:
# self.root.info(command)
print(command)
self.cursor.execute(command)
return True
except Exception as e:
print(command)
self.root.error(e)
return False
def commit(self):
if self.connection:
self.connection.commit()
def close(self):
self.connection.close()
def select(self, what, table, condition=""):
"""
SELECT <what> FROM <table> <condition>;
:param what: string
:param table: string
:param condition: string
:return: list
"""
command = "SELECT " + what + " FROM " + table + " " + condition + ";"
self.execute(command)
return self.cursor.fetchall()
Trying to access the backend in browser returns this, which makes me believes the tables don't exist in postgresql
Output in txt file (roughly 300,000 lines, IDK if this is useful in analyzing the problem but thought I'd include it either way)
I currently have a long running python script (multiple days), which occasionally executes queries in a mysql db. The queries are executed from different threads.
I connect to the db as following:
import mysql.connector
import time
class DB():
connection = None
def __init__(self, host, user, password, database):
self.host = host;
self.user = user;
self.password = password;
self.database = database;
self.connect()
def connect(self):
try:
self.connection = mysql.connector.connect(
host = self.host,
user = self.user,
password = self.password,
database = self.database,
autocommit = True,
)
except Exception as exception:
print("[DBCONNECTION] {exception} Retrying in 5 seconds.".format(exception = str(exception)))
time.sleep(5)
self.connect()
db = DB("11.111.11.111", "mydb", "mydb", "mydb")
One of the functions to execute a query:
def getUsersDB(self):
db.connection.ping(reconnect=True)
cursor = db.connection.cursor()
cursor.execute("SELECT * FROM Users")
users = cursor.fetchall()
return users
I believe, that I'm far away from a good practice how I handle the connection. What is the correct way for such a case?
The problem with your approach is that db.connection.ping(reconnect=True) doesn't promise you a live connection.
You can read the function's description here:
https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlconnection-ping.html
You can try to use this:
class DB:
connection = None
def __init__(
self,
host: str,
user: str,
password: str,
database: str
) -> None:
self.host = host
self.user = user
self.password = password
self.database = database
self.connection = self.init_conn()
def init_conn(
self,
):
return mysql.connector.connect(
host=self.host,
user=self.user,
password=self.password,
database=self.database,
)
def get_cursor(
self,
):
# This will try to reconnect also.
# In case it fails, you will have to create a new connection
try:
self.connection.ping(
reconnect=True,
attempts=3,
delay=5,
)
except mysql.connector.InterfaceError as err:
self.connection = self.init_conn()
return self.connection.cursor()
And use the DB object like this:
def getUsersDB(self):
cursor = db.get_cursor() # cursor from a live connection
cursor.execute("SELECT * FROM Users")
users = cursor.fetchall()
return users
I am a beginner in sqlalchemy.
My connection function in _core.py
from sqlalchemy import create_engine
from methodtools import lru_cache
#lru_cache(maxsize=16)
def get_engine(db="homelan"):
qs = 'mysql+pymysql://user:pwd#localhost/{db}'.format(db=db)
engine = create_engine(qs)
connection = engine.connect()
return engine, connection
in my code if the table does not exist for a specific host machine I create. as shown below:
server_status.py
class HostStatusManager(object):
keep_record = 10 # days
"""This class contains methods to manage the status of the host
registered in database for supervision or monitoring purpose.
"""
def __init__(self, ip_address):
super(HostStatusManager, self).__init__()
self._ip = ip_address
engine, connection = _core.get_engine()
self._engine = engine
self._connection = connection
self._host_table = None
self._host_table_name = None
if not self.host_status_table_exists():
self._host_table = self._create_table()
def get_status(self):
"""Gets the latest status of the host whether online or offline.
"""
columns = self._host_table.columns
print("Cols: ".format(columns))
select_field = getattr(columns, "status")
query = db.select(
[select_field]
).order_by(
db.desc(
getattr(columns, "id")
)
).limit(1)
_log.debug(query)
ResultProxy = self._connection.execute(query)
ResultSet = ResultProxy.fetchall()
if ResultSet:
return ResultSet[0][0]
_log.warning("No existing status found from {0}.".format(
self._host_table
)
)
def set_status(self, data):
query = db.insert(self._host_table).values(**data)
results = self._connection.execute(query)
If I directly call set_status it works fine but the get_status throws error saying:
pymysql.err.InternalError: (1412, 'Table definition has changed,
please retry transaction')
You shouldn't be using an lru cache to store connections, but rather use the engine's built in connection pool. Then, every time you need to talk to the database, ask for a connection from the engine, and close the connection when you're done with it. The engine will by default have a pool of size 5.
from sqlalchemy import create_engine
def get_engine(db="homelan"):
qs = 'mysql+pymysql://user:pwd#localhost/{db}'.format(db=db)
engine = create_engine(qs)
return engine
class HostStatusManager(object):
keep_record = 10 # days
"""This class contains methods to manage the status of the host
registered in database for supervision or monitoring purpose.
"""
def __init__(self, ip_address):
super(HostStatusManager, self).__init__()
self._ip = ip_address
engine, connection = _core.get_engine()
self._engine = engine
self._host_table = None
self._host_table_name = None
if not self.host_status_table_exists():
self._host_table = self._create_table()
def get_status(self):
"""Gets the latest status of the host whether online or offline.
"""
columns = self._host_table.columns
print("Cols: ".format(columns))
select_field = getattr(columns, "status")
query = db.select(
[select_field]
).order_by(
db.desc(
getattr(columns, "id")
)
).limit(1)
_log.debug(query)
connection = self._engine.connect()
try:
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
if ResultSet:
return ResultSet[0][0]
_log.warning("No existing status found from {0}.".format(
self._host_table
)
)
finally:
connection.close()
def set_status(self, data):
query = db.insert(self._host_table).values(**data)
connection = self._engine.connect()
try:
results = connection.execute(query)
finally:
connection.close()
I'm trying to login to my MySQL server that I'm running on DigitalOcean, but unfortunately I have no clue as to how to push the login through python. I've got the MySQL part implemented, but don't know how to login to the actual server itself (the computer). What other code do I need to add to accomplish this? I've already added the variables mySqlUser and mySqlPassword to the top of the file.
Here is the code I have so far:
import MySQLdb
class Database:
host = 'some ip address'
user = 'root'
password = '123'
mySqlUser = 'root'
mySqlPassword = 'someotherpassword'
db = 'test'
def __init__(self):
self.connection = MySQLdb.connect(self.host, self.user, self.password, self.db)
self.cursor = self.connection.cursor()
def insert(self, query):
try:
self.cursor.execute(query)
self.connection.commit()
except:
self.connection.rollback()
def query(self, query):
cursor = self.connection.cursor( MySQLdb.cursors.DictCursor )
cursor.execute(query)
return cursor.fetchall()
def __del__(self):
self.connection.close()
if __name__ == "__main__":
db = Database()
#CleanUp Operation
del_query = "DELETE FROM basic_python_database"
db.insert(del_query)
# Data Insert into the table
query = """
INSERT INTO basic_python_database
(`name`, `age`)
VALUES
('Mike', 21),
('Michael', 21),
('Imran', 21)
"""
# db.query(query)
db.insert(query)
# Data retrieved from the table
select_query = """
SELECT * FROM basic_python_database
WHERE age = 21
"""
people = db.query(select_query)
for person in people:
print "Found %s " % person['name']
You can Try this:
def __init__(self):
self.host = 'some ip address'
self.user = 'root'
self.password = '123'
self.mySqlUser = 'root'
self.mySqlPassword = 'someotherpassword'
self.connection = MySQLdb.connect(self.host, self.user, self.password, self.db)
self.cursor = self.connection.cursor()
or
def __init__(self):
self.connection = MySQLdb.connect(host, user, password, db)
self.cursor = self.connection.cursor()
and you batter transfer parameter when instantiation you class , instead of fixed values in class.
just a suggest and don't mind my english (:
I'm trying use Python and pyodbc to access SQL server 2008. The first connection works. Then, after the program finishes its job, it closes the connection. When the program tries to access the database and connect to it again, it fails in the statement:
self.conn = pyodbc.connect(DRIVER=self.DRIVER, SERVER=self.SERVER, DATABASE=self.DATABASE, UID=self.UID, PWD=self.PWD, charset="UTF-8")
but the first time is OK. So does anyone know why? Below is the Python code:
class ODBC_MS:
def __init__(self, DRIVER,SERVER, DATABASE, UID, PWD):
''' initialization '''
self.DRIVER = DRIVER
self.SERVER = SERVER
self.DATABASE = DATABASE
self.UID = UID
self.PWD = PWD
def _GetConnect(self):
''' Connect to the DB '''
if not self.DATABASE:
raise(NameError,"no getting db name")
try:
self.conn = pyodbc.connect(DRIVER=self.DRIVER, SERVER=self.SERVER,
DATABASE=self.DATABASE, UID=self.UID,
PWD=self.PWD, charset="UTF-8")
except Exception,e:
print e.message
else:
self.cur = self.conn.cursor()
if not self.cur:
raise(NameError,"connected failed!")
else:
return self.cur, self.conn
def ExecNoQuery(self,conn, cursor, sql):
cursor.execute(sql)
ret = conn.commit()
return ret
def _UnConnect(self,conn, cursor):
conn.close()
if __name__ == '__main__':
ms = ODBC_MS('{SQL SERVER}', r'<server>', '<db>', '<user>', '<password>')
cursor, conn = ms._GetConnect() #connection
sql = "create table XX for example"
ret = ms.ExecNoQuery(conn, cursor,sql) #sql operation
ms._UnConnect(conn, cursor) #close db
#access the database the second time.
ms = ODBC_MS('{SQL SERVER}', r'<server>', '<db>', '<user>', '<password>')
cursor, conn = ms._GetConnect() # not success, I don't know why
sql = "create table XX for example"
ret = ms.ExecNoQuery(conn, cursor,sql) #sql operation
ms._UnConnect(conn, cursor) #close db
The second time when the program calls ms.GetConnect(), the statement self.conn = pyodbc.connect(DRIVER=self.DRIVER, SERVER=self.SERVER, DATABASE=self.DATABASE, UID=self.UID, PWD=self.PWD, charset="UTF-8") fails.