I am running a data parser/web scraper with python. The parser then pushes the data (SQL Tables) to postgresql. However, I can't find the tables in pgadmin. This is part of a full stack django webapp, using docker, which I did not create but I am trying to get to run locally. As far as I can tell docker containers are working as intended, and so is the dataparsing script. Since I don't know much about issues like this please let me know if there is anything else I should include
Database connection in python
import psycopg2
import logging
import sys
import os
class DatabaseConnection(object):
def __init__(self, user="postgres", password="1234", host="127.0.0.1", port="5432", database="postgres",
course_table="course_info", prereqs_table="prereqs", antireqs_table="antireqs",
requirements_table="requirements", communications_table="communications",
breadth_table="breadth_table"):
if os.getenv("UWPATH_ENVIRONMENT") is not None and os.getenv("UWPATH_ENVIRONMENT") == "docker":
host = "db"
if os.getenv("DB_PASS") is not None:
password = os.getenv("DB_PASS")
if os.getenv("DB_USER") is not None:
user = os.getenv("DB_USER")
if os.getenv("DB_NAME") is not None:
database = os.getenv("DB_NAME")
if os.getenv("DB_HOST") is not None:
host = os.getenv("DB_HOST")
if os.getenv("DB_PORT") is not None:
port = os.getenv("DB_PORT")
self.connection = psycopg2.connect(user=user, password=password, host=host, port=port, database=database)
self.cursor = self.connection.cursor()
self.course_table = course_table
self.prereqs_table = prereqs_table
self.antireqs_table = antireqs_table
self.requirements_table = requirements_table
self.communications_table = communications_table
self.breadth_table = breadth_table
self.root = self.__Logger()
def __Logger(self):
self.logger = logging.getLogger()
if not len(self.logger.handlers):
self.logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
return self.logger
def execute(self, command):
try:
# self.root.info(command)
print(command)
self.cursor.execute(command)
return True
except Exception as e:
print(command)
self.root.error(e)
return False
def commit(self):
if self.connection:
self.connection.commit()
def close(self):
self.connection.close()
def select(self, what, table, condition=""):
"""
SELECT <what> FROM <table> <condition>;
:param what: string
:param table: string
:param condition: string
:return: list
"""
command = "SELECT " + what + " FROM " + table + " " + condition + ";"
self.execute(command)
return self.cursor.fetchall()
Trying to access the backend in browser returns this, which makes me believes the tables don't exist in postgresql
Output in txt file (roughly 300,000 lines, IDK if this is useful in analyzing the problem but thought I'd include it either way)
Related
I currently have a long running python script (multiple days), which occasionally executes queries in a mysql db. The queries are executed from different threads.
I connect to the db as following:
import mysql.connector
import time
class DB():
connection = None
def __init__(self, host, user, password, database):
self.host = host;
self.user = user;
self.password = password;
self.database = database;
self.connect()
def connect(self):
try:
self.connection = mysql.connector.connect(
host = self.host,
user = self.user,
password = self.password,
database = self.database,
autocommit = True,
)
except Exception as exception:
print("[DBCONNECTION] {exception} Retrying in 5 seconds.".format(exception = str(exception)))
time.sleep(5)
self.connect()
db = DB("11.111.11.111", "mydb", "mydb", "mydb")
One of the functions to execute a query:
def getUsersDB(self):
db.connection.ping(reconnect=True)
cursor = db.connection.cursor()
cursor.execute("SELECT * FROM Users")
users = cursor.fetchall()
return users
I believe, that I'm far away from a good practice how I handle the connection. What is the correct way for such a case?
The problem with your approach is that db.connection.ping(reconnect=True) doesn't promise you a live connection.
You can read the function's description here:
https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlconnection-ping.html
You can try to use this:
class DB:
connection = None
def __init__(
self,
host: str,
user: str,
password: str,
database: str
) -> None:
self.host = host
self.user = user
self.password = password
self.database = database
self.connection = self.init_conn()
def init_conn(
self,
):
return mysql.connector.connect(
host=self.host,
user=self.user,
password=self.password,
database=self.database,
)
def get_cursor(
self,
):
# This will try to reconnect also.
# In case it fails, you will have to create a new connection
try:
self.connection.ping(
reconnect=True,
attempts=3,
delay=5,
)
except mysql.connector.InterfaceError as err:
self.connection = self.init_conn()
return self.connection.cursor()
And use the DB object like this:
def getUsersDB(self):
cursor = db.get_cursor() # cursor from a live connection
cursor.execute("SELECT * FROM Users")
users = cursor.fetchall()
return users
I am a beginner in sqlalchemy.
My connection function in _core.py
from sqlalchemy import create_engine
from methodtools import lru_cache
#lru_cache(maxsize=16)
def get_engine(db="homelan"):
qs = 'mysql+pymysql://user:pwd#localhost/{db}'.format(db=db)
engine = create_engine(qs)
connection = engine.connect()
return engine, connection
in my code if the table does not exist for a specific host machine I create. as shown below:
server_status.py
class HostStatusManager(object):
keep_record = 10 # days
"""This class contains methods to manage the status of the host
registered in database for supervision or monitoring purpose.
"""
def __init__(self, ip_address):
super(HostStatusManager, self).__init__()
self._ip = ip_address
engine, connection = _core.get_engine()
self._engine = engine
self._connection = connection
self._host_table = None
self._host_table_name = None
if not self.host_status_table_exists():
self._host_table = self._create_table()
def get_status(self):
"""Gets the latest status of the host whether online or offline.
"""
columns = self._host_table.columns
print("Cols: ".format(columns))
select_field = getattr(columns, "status")
query = db.select(
[select_field]
).order_by(
db.desc(
getattr(columns, "id")
)
).limit(1)
_log.debug(query)
ResultProxy = self._connection.execute(query)
ResultSet = ResultProxy.fetchall()
if ResultSet:
return ResultSet[0][0]
_log.warning("No existing status found from {0}.".format(
self._host_table
)
)
def set_status(self, data):
query = db.insert(self._host_table).values(**data)
results = self._connection.execute(query)
If I directly call set_status it works fine but the get_status throws error saying:
pymysql.err.InternalError: (1412, 'Table definition has changed,
please retry transaction')
You shouldn't be using an lru cache to store connections, but rather use the engine's built in connection pool. Then, every time you need to talk to the database, ask for a connection from the engine, and close the connection when you're done with it. The engine will by default have a pool of size 5.
from sqlalchemy import create_engine
def get_engine(db="homelan"):
qs = 'mysql+pymysql://user:pwd#localhost/{db}'.format(db=db)
engine = create_engine(qs)
return engine
class HostStatusManager(object):
keep_record = 10 # days
"""This class contains methods to manage the status of the host
registered in database for supervision or monitoring purpose.
"""
def __init__(self, ip_address):
super(HostStatusManager, self).__init__()
self._ip = ip_address
engine, connection = _core.get_engine()
self._engine = engine
self._host_table = None
self._host_table_name = None
if not self.host_status_table_exists():
self._host_table = self._create_table()
def get_status(self):
"""Gets the latest status of the host whether online or offline.
"""
columns = self._host_table.columns
print("Cols: ".format(columns))
select_field = getattr(columns, "status")
query = db.select(
[select_field]
).order_by(
db.desc(
getattr(columns, "id")
)
).limit(1)
_log.debug(query)
connection = self._engine.connect()
try:
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
if ResultSet:
return ResultSet[0][0]
_log.warning("No existing status found from {0}.".format(
self._host_table
)
)
finally:
connection.close()
def set_status(self, data):
query = db.insert(self._host_table).values(**data)
connection = self._engine.connect()
try:
results = connection.execute(query)
finally:
connection.close()
I wrote custom handler that puts log messages into MySQL database. I want to use this handler only if I was connected properly using mysql-connector python library. Otherwise I want to omit this handler.
class LogDBHandler(logging.Handler):
"""Customized logging handler that puts logs to the database."""
def __init__(self):
super().__init__()
self.table_name = 'log'
self.counter = 0
self.chunk = []
self.sql_conn, self.sql_cursor = self.connect_to_db()
# create log table if doesn't exist
try:
self.create_table()
except mysql.connector.errors.ProgrammingError:
pass
#staticmethod
def connect_to_db():
"""Connect to MySQL database to perform logging."""
credentials = {
"user": os.environ.get("DARWIN_DB_USER"),
"password": os.environ.get("DARWIN_DB_PASSWORD"),
"host": os.environ.get("DARWIN_DB_HOST", "127.0.0.1"),
"port": os.environ.get("DARWIN_DB_PORT", "3306"),
"database": os.environ.get("DARWIN_DB_NAME"),
}
db = mysql.connector.connect(**credentials)
cursor = db.cursor()
return db, cursor
...
This is logging configuration file, where I store all loggers, handlers etc.
[loggers]
keys=root
[handlers]
keys=root, db
[formatters]
keys=formatter
[logger_root]
level=DEBUG
handlers=root, db
[handler_root]
class=FileHandler
level=DEBUG
formatter=formatter
args=('darwin.log', 'w')
[handler_db]
class=libs.logger.LogDBHandler
level=DEBUG
formatter=formatter
args=()
[formatter_formatter]
format=%(asctime)s - %(name)-12s - %(levelname)-8s - %(message)s
Everything works fine if database credentials are valid. If they are wrong mysql.connector.errors.ProgrammingError exception is raised. Instead of shutting down entire program I would like to omit this custom handler if self.connect_to_db() raises exception . Any ideas how to achieve such thing?
Thanks in advance ;)
Reedit with a working mock:
class LogDBHandler(logging.Handler):
def __init__(self):
super().__init__()
self.sql_conn = None
try:
self.sql_conn, self.sql_cursor = self.connect_to_db()
except:
return None
def emit(self, msg):
pass
logging.handlers.LogDBHandler = LogDBHandler
Main:
import logging.config
from logdbhandler import LogDBHandler
logging.config.fileConfig( 'logconf.yaml', )
logger = logging.getLogger()
for l in logger.handlers:
if isinstance(l, LogDBHandler):
if not l.sql_conn:
logger.removeHandler(l)
print(logger.handlers)
Solution that worked for me. I'm aware it's not elegant, but it works.
class LogDBHandler(logging.Handler):
"""Customized logging handler that puts logs to the database."""
def __init__(self):
super().__init__()
self.table_name = 'log'
self.counter = 0
self.chunk = []
self.sql_conn = self.connect_to_db()
if self.sql_conn:
self.sql_cursor = self.sql_conn.cursor()
# create log table if doesn't exist
self.create_log_table()
else:
print("DB Connection error")
def emit(self, record):
"""Called on each log attempt."""
if self.sql_conn:
timestamp = time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime(record.created))
self.chunk.append((record.name, record.levelname,
str(record.getMessage()), timestamp))
self.counter += 1
# Insert chunk of data into DB
if self.counter % 1000 == 0:
self.insert_chunk()
def flush(self):
"""
Handler destructor, close all db connection and insert final
records into db.
"""
if self.sql_conn:
# Insert last chunk of data
self.insert_chunk()
# Close connection with DB
self.sql_cursor.close()
self.sql_conn.close()
#staticmethod
def connect_to_db():
"""Connect to MySQL database to perform logging."""
credentials = {
"user": os.environ.get("DARWIN_DB_USER"),
"password": os.environ.get("DARWIN_DB_PASSWORD"),
"host": os.environ.get("DARWIN_DB_HOST", "127.0.0.1"),
"port": os.environ.get("DARWIN_DB_PORT", "3306"),
"database": os.environ.get("DARWIN_DB_NAME"),
}
try:
connection = mysql.connector.connect(**credentials)
return connection
except mysql.connector.errors.ProgrammingError:
return None
May be someone will benefit from the one I have eventually created for myself:
# -*- coding: utf-8 -*-
"""
Copied and modified from https://github.com/onemoretime/mySQLHandler/
"""
import MySQLdb
import logging
import time
class mySQLHandler(logging.Handler):
"""
Logging handler for MySQL db.
"""
check_sql = """SHOW TABLES LIKE '{log_table}';"""
create_sql = """CREATE TABLE IF NOT EXISTS {log_table}(
Created text,
Name text,
LogLevel int,
LogLevelName text,
Message text,
Args text,
Module text,
FuncName text,
LineNo int,
Exception text,
Process int,
Thread text,
ThreadName text
)"""
insert_sql = """INSERT INTO {log_table}(
Created,
Name,
LogLevel,
LogLevelName,
Message,
Args,
Module,
FuncName,
LineNo,
Exception,
Process,
Thread,
ThreadName
)
VALUES (
"{dbtime}",
"{name}",
{levelno},
"{levelname}",
"{msg}",
"{args}",
"{module}",
"{funcName}",
{lineno},
"{exc_text}",
{process},
"{thread}",
"{threadName}"
);
"""
def __init__(self, **kwargs):
"""
Customized logging handler that puts logs to MySQL db.
"""
logging.Handler.__init__(self)
self.host = kwargs['host']
self.port = kwargs['port']
self.dbuser = kwargs['dbuser']
self.dbpassword = kwargs['dbpassword']
self.dbname = kwargs['dbname']
self.log_table = kwargs['log_table']
self.sql_conn, self.sql_cursor = self.connect_to_db()
def connect_to_db(self):
"""
Connect to MySQL database to perform logging.
Create log table if does not exist.
"""
try:
conn=MySQLdb.connect(host=self.host,port=self.port,user=self.dbuser,passwd=self.dbpassword,db=self.dbname)
cur = conn.cursor()
cur.execute(mySQLHandler.check_sql.format(log_table = self.log_table))
conn.commit()
table_exist = cur.fetchone()
if not table_exist:
cur.execute(mySQLHandler.create_sql.format(log_table = self.log_table))
conn.commit()
return conn, cur
except Exception: # ignoring connection and table creation exceptions as this handler meant to be used with application db
return None, None
def flush(self):
"""
Override to implement custom flushing behaviour for MySQLdb connection.
"""
if self.sql_conn:
self.sql_cursor.close()
self.sql_conn.close()
def formatDBTime(self, record):
"""
Time formatter.
"""
record.dbtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
def emit(self, record):
"""
Emit a record to MySQL db.
Format the record and send it to the specified database.
"""
if self.sql_conn:
try:
self.format(record)
self.formatDBTime(record)
record.exc_text = logging._defaultFormatter.formatException(record.exc_info).replace('"', "'").replace('\n','').replace('\r','') if record.exc_info else ""
if isinstance(record.msg, str): record.msg = record.msg.replace("'", "''")
sql_stmt = mySQLHandler.insert_sql.format(**record.__dict__, log_table = self.log_table)
self.sql_cursor.execute(sql_stmt)
self.sql_conn.commit()
except Exception:
self.sql_conn.rollback()
self.handleError(record)
logging.conf looks like this:
[handler_mySQLHandler]
#class=logging.handlers.mySQLHandler
class=mySQLHandler.mySQLHandler
kwargs={"host":"hostname", "port":3306, "dbuser":"dubber", "dbpassword": "password", "dbname": "dbname", "log_table":"syslog"}
I have a python AWS lambda function that takes JSON records, checks them to see if they have required keys, and then inserts into a MySQL db (AWS RDS Aurora). The function gets invoked whenever a new record comes into the stream def handler.
At the moment, Lambda is reporting some errors, but when I look at cloudwatch logs I don't see any errors, which leads me to believe that maybe I'm not handling or catching the exception. Can anyone tell me where the issue might be?
from __future__ import print_function
import base64
import json
import pymysql
RDS_HOST = 'host'
DB_USER = 'dummy_user'
DB_PASSWORD = 'password1234'
DB_NAME = 'crazy_name'
DB_TABLE = 'wow_table'
class MYSQL(object):
'''
This a wrapper Class for PyMySQL
'''
CONNECTION_TIMEOUT = 30
def __init__(self, host, user, password, database, table):
self.host = host
self.user = user
self.password = password
self.database = database
self.table = table
self.connection = self.connect()
def connect(self):
'''
Connects to MySQL instance
'''
try:
connection = pymysql.connect(
host=self.host,
user=self.user,
password=self.password,
db=self.database,
connect_timeout=self.CONNECTION_TIMEOUT
)
return connection
except Exception as ex:
print(ex)
print("ERROR: Unexpected error: Could not connect to AuroraDB instance")
def execute(self, account_id, external_ref_id, timestamp):
'''
Executes command given a MySQL connection
'''
with self.connection.cursor() as cursor:
sql = ('INSERT INTO ' +
self.database +
'.' +
self.table +
'(`account_id`, `external_reference_id`, `registration`, `c_name`, `c_id`, `create_date`)' +
' VALUES (%s, %s, DATE_FORMAT(STR_TO_DATE(%s,"%%Y-%%M-%%d %%H:%%i:%%s"),"%%Y-%%m-%%d %%H:%%i:%%s"), %s, %s, current_timestamp())' +
' ON DUPLICATE KEY UPDATE create_date = VALUES(create_date)')
cursor.execute(sql, (
account_id,
external_ref_id,
timestamp,
'bingo',
300)
)
self.connection.commit()
def close_connection(self):
'''
Closes connection to MySQL
'''
self.connection.close()
def get_data_from_kinesis_object(obj):
'''
Retrieves data from kinesis event
'''
return obj['kinesis']['data']
def decode_data(data):
'''
Decodes record via base64
'''
return base64.b64decode(data)
def split_records_into_record(records):
'''
Splits a record of records into an array of records
'''
return records.split('\n')
def parse_record(record):
'''
parses record into JSON
'''
if record:
return json.loads(record)
def is_record_valid(record):
'''
Check for keys in event
returns True if they all exist
and False if they dont all exist
'''
return all(key in record for key in (
'eventName',
'sourceType',
'AccountId',
'Timestamp',
'ExternalReferenceId'
))
def handler(event, context):
"""
This function inserts data into Aurora RDS instance
"""
mysql = MYSQL(RDS_HOST, DB_USER, DB_PASSWORD, DB_NAME, DB_TABLE)
for obj in event['Records']:
records = decode_data(get_data_from_kinesis_object(obj))
split_records = split_records_into_record(records)
for record in split_records:
parsed_record = parse_record(record)
if is_record_valid(parsed_record):
mysql.execute(
parsed_record['AccountId'],
parsed_record['ExternalReferenceId'],
str(parsed_record['Timestamp'])
)
mysql.close_connection()
I'm trying to develop a logging handler for PostgreSQL. I've used this gist as a template and changed that to suit my needs as
# -*- coding: utf-8 -*-
import psycopg2
import logging
import time
## Logging handler for PostgreSQL
#
#
class psqlHandler(logging.Handler):
initial_sql = """CREATE TABLE IF NOT EXISTS log(
Created text,
Name text,
LogLevel int,
LogLevelName text,
Message text,
Args text,
Module text,
FuncName text,
LineNo int,
Exception text,
Process int,
Thread text,
ThreadName text
)"""
insertion_sql = """INSERT INTO log(
Created,
Name,
LogLevel,
LogLevelName,
Message,
Module,
FuncName,
LineNo,
Exception,
Process,
Thread,
ThreadName) VALUES (
%(created)s,
%(name)s,
%(levelno)s,
%(levelname)s,
%(msg)s,
%(module)s,
%(funcName)s,
%(lineno)s,
%(exc_text)s,
%(process)s,
%(thread)s,
%(threadName)s
);"""
def connect(self):
try:
self.__connect = psycopg2.connect(
database=self.__database,
host = self.__host,
user = self.__user,
password = self.__password,
sslmode="disable")
return True
except:
return False
def __init__(self, params):
if not params:
raise Exception ("No database where to log ☻")
self.__database = params['database']
self.__host = params['host']
self.__user = params['user']
self.__password = params['password']
self.__connect = None
if not self.connect():
raise Exception ("Database connection error, no logging ☻")
logging.Handler.__init__(self)
self.__connect.cursor().execute(psqlHandler.initial_sql)
self.__connect.commit()
self.__connect.cursor().close()
def emit(self, record):
# Use default formatting:
self.format(record)
if record.exc_info:
record.exc_text = logging._defaultFormatter.formatException(record.exc_info)
else:
record.exc_text = ""
# Insert log record:
try:
cur = self.__connect.cursor()
except:
self.connect()
cur = self.__connect.cursor()
cur.execute(psqlHandler.insertion_sql, record.__dict__)
self.__connect.commit()
self.__connect.cursor().close()
if __name__ == "__main__":
myh = psqlHandler({'host':"localhost", 'user':"test",
'password':"testpw", 'database':"test"})
l = logging.getLogger("TEST")
l.setLevel(logging.DEBUG)
l.addHandler(myh)
for i in xrange(1):
l.info("test%i"%i)
What I would like to know is if this logger is correct (apparently works) and if it would work in a multiprocessing environment.
Thanks.