I have 2 python functions that handle an event in a lambda function that are essentially the same thing. When checking the logs in AWS I get the following error:
{
"errorMessage": "local variable 'post_connection' referenced before assignment",
"errorType": "UnboundLocalError",
"stackTrace": [
" File \"/var/task/etl_python/handler.py\", line 11, in handle\n EtlCall.run_bc(event)\n",
" File \"/var/task/etl_python/elt_call.py\", line 153, in run_bc\n if post_connection:\n"
]
}
My code looks like this:
def run_bo(event):
s3_resource = boto3.resource('s3')
idv_endpoint = os.getenv('DB_ENDPOINT')
idv_database = os.getenv("DB_NAME")
filename = 'staging/aml_bo'
bucket = os.getenv('BILLING_ETL')
if 'resources' in event and "psql_billing" in event['resources'][0]:
try:
config = VaultService()
s3_resource = boto3.resource('s3')
idv_endpoint = os.getenv('DB_ENDPOINT')
idv_database = os.getenv("DB_NAME")
filename = 'staging/billing_bo'
bucket = os.getenv('BILLING_ETL')
idv_username = config.get_postgres_username()
idv_password = config.get_postgres_password()
post_connection = psycopg2.connect(user = idv_username
, password = idv_password
, host = idv_endpoint
, port = "5432"
, database = idv_database)
cursor = post_connection.cursor()
bo_qry = "SELECT uuid\
,first_name, middle_initial, last_name, date(date_of_birth)
mailing_address, public.bo"
#Might need not need the next two lines but this should work.
query = """COPY ({}) TO STDIN WITH (FORMAT csv, DELIMITER '|', QUOTE '"', HEADER TRUE)""".format(bo_qry)
file = StringIO()
cursor.copy_expert(query, file)
s3_resource.Object(bucket, f'{filename}.csv').put(Body=file.getvalue())
cursor.close()
except(Exception, psycopg2.Error) as error:
print("Error connecting to postgres instance", error)
finally:
if post_connection:
cursor.close()
post_connection.close()
#return "SUCCESS"
else:
# Unknown notification
#raise Exception(f'Unexpected event notification: {event}')
print("Cannot make a solid connection to psql instance. Please check code configuration")
def run_bc(event):
if 'resources' in event and "psql_billing" in event['resources'][0]:
try:
config = VaultService()
s3_resource = boto3.resource('s3')
idv_endpoint = os.getenv('DB_ENDPOINT')
idv_database = os.getenv("DB_NAME")
filename = 'staging/billing_bc'
bucket = os.getenv('BILLING_ETL')
idv_username = config.get_postgres_username()
idv_password = config.get_postgres_password()
post_connection = psycopg2.connect(user = idv_username
, password = idv_password
, host = idv_endpoint
, port = "5432"
, database = idv_database)
cursor = post_connection.cursor()
bc_qry = "select id, uuid, document_type, image_id,
document_id\
from public.bc"
#Might need not need the next two lines but this should work.
query = """COPY ({}) TO STDIN WITH (FORMAT csv, DELIMITER '|', QUOTE '"', HEADER TRUE)""".format(bc_flowdown_qry)
file = StringIO()
cursor.copy_expert(query, file)
s3_resource.Object(bucket, f'{filename}.csv').put(Body=file.getvalue())
cursor.close()
except(Exception, psycopg2.Error) as error:
print("Error connecting to postgres instance", error)
finally:
if post_connection:
cursor.close()
post_connection.close()
#return "SUCCESS"
else:
# Unknown notification
#raise Exception(f'Unexpected event notification: {event}')
print("Cannot make a solid connection to psql instance. Please check code configuration")
I don't understand how my connection is unbound if I am closing the connection and the connection after each function and then reopening it for the next. I close it at the end when the data is dumped to my file and then create a new connection in the next function.
Related
I am trying to insert a zip file to blob storage of cassandra db.
type workflowid str and savepath blob while creating a table column
try:
bin_data = open('4e9b-9c08-e3cafafd871b-20211129034556.zip', 'rb').read()
print(type(bin_data))
#hex_data = codecs.encode(bin_data, "hex_codec")
#print(type(hex_data))
auth_provider = PlainTextAuthProvider(username = username , password = password)
cluster = Cluster(contact_points = [ip], port=1234, auth_provider = auth_provider)
session = cluster.connect("keyspace")
session.row_factory = dict_factory
resultpath = bin_data
workflowid = "d6fe8ea4-fffd"
query = "insert into tablename(resultpath, workflowid) VALUES({resultpath}, '{workflowid}');".format(resultpath, workflowid)
rslt = session.execute(query, timeout=None)
df = pd.DataFrame(rslt)
print(df)
except Exception as E:
print("Error: ", str(E))
It looks like some of the data in the BLOB isn't being properly escaped. Try using a prepared statement, instead:
query = "insert into tablename(resultpath, workflowid) VALUES(?,?);"
pStatement = session.prepare(query)
rslt = session.execute(pStatement, [resultpath, workflowid])
I am using pycharm and sqlalchemy to connect to the database
The error shown is as follows
"Unable to determine database type from python tuple type"
db.py file
import sqlalchemy
user_name = ''
password = ''
server = ''
db_name =
DATABASE_URL = f"mssql+pymssql://{user_name}:{password}#{server}/{db_name}"
engine = sqlalchemy.create_engine(DATABASE_URL)
rom .route import account
from ...shared.db import engine
#account.post("/login")
async def login(email: str, password: str):
try:
mode: str = "LOGIN",
userid: int = 0,
loginip: str = " ",
loginbrowser: str = ""
connection = engine.raw_connection()
cursor = connection.cursor()
cursor.callproc('SP_Login', (mode, email, password, loginip, loginbrowser, userid))
# result = []
for row in cursor:
print(row)
break
return row
connection.commit()
# print(result)
except Exception as e:
print(e)
I'm trying to update my Heroku DB from a Python script I have on my computer. I set up my app on Heroku with NodeJS (because I just like Javascript for that sort of thing), and I'm not sure I can add in a Python script to manage everything. I was able to fill out the DB once, with the script, and it had no hangups. When I try to update it, I get the following statement in my console:
Traceback (most recent call last):
File "/home/alan/dev/python/smog_usage_stats/scripts/DBManager.py", line 17, in <module>
CONN = pg2.connect(
File "/home/alan/dev/python/smog_usage_stats/venv/lib/python3.8/site-packages/psycopg2/__init__.py", line 127, in connect
conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
psycopg2.OperationalError: FATAL: role "alan" does not exist
and this is my script:
#DBManager.py
import os
import zipfile
import psycopg2 as pg2
from os.path import join, dirname
from dotenv import load_dotenv
# -------------------------------
# Connection variables
# -------------------------------
dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)
# -------------------------------
# Connection to database
# -------------------------------
# Server connection
CONN = pg2.connect(
database = os.environ.get('PG_DATABASE'),
user = os.environ.get('PG_USER'),
password = os.environ.get('PG_PASSWORD'),
host = os.environ.get('PG_HOST'),
port = os.environ.get('PG_PORT')
)
# Local connection
# CONN = pg2.connect(
# database = os.environ.get('LOCAL_DATABASE'),
# user = os.environ.get('LOCAL_USER'),
# password = os.environ.get('LOCAL_PASSWORD'),
# host = os.environ.get('LOCAL_HOST'),
# port = os.environ.get('LOCAL_PORT')
# )
print("Connected to POSTGRES!")
global CUR
CUR = CONN.cursor()
# -------------------------------
# Database manager class
# -------------------------------
class DB_Manager:
def __init__(self):
self.table_name = "smogon_usage_stats"
try:
self.__FILE = os.path.join(
os.getcwd(),
"data/statsmaster.csv"
)
except:
print('you haven\'t downloaded any stats')
# -------------------------------
# Create the tables for the database
# -------------------------------
def construct_tables(self):
master_file = open(self.__FILE)
columns = master_file.readline().strip().split(",")
sql_cmd = "DROP TABLE IF EXISTS " + self.table_name + ";\n"
sql_cmd += "CREATE TABLE " + self.table_name + " (\n"
sql_cmd += (
"id_ SERIAL PRIMARY KEY,\n"
+ columns[0] + " INTEGER,\n"
+ columns[1] + " VARCHAR(50),\n"
+ columns[2] + " FLOAT,\n"
+ columns[3] + " INTEGER,\n"
+ columns[4] + " FLOAT,\n"
+ columns[5] + " INTEGER,\n"
+ columns[6] + " FLOAT,\n"
+ columns[7] + " INTEGER,\n"
+ columns[8] + " VARCHAR(10),\n"
+ columns[9] + " VARCHAR(50));"
)
CUR.execute(sql_cmd)
CONN.commit()
# -------------------------------
# Copy data from CSV files created in smogon_pull.py into database
# -------------------------------.
def fill_tables(self):
master_file = open(self.__FILE, "r")
columns = tuple(master_file.readline().strip().split(","))
CUR.copy_from(
master_file,
self.table_name,
columns=columns,
sep=","
)
CONN.commit()
# -------------------------------
# Disconnect from database.
# -------------------------------
def close_db(self):
CUR.close()
print("Cursor closed.")
CONN.close()
print("Connection to server closed.")
if __name__ == "__main__":
manager = DB_Manager()
print("connected")
manager.construct_tables()
print("table made")
manager.fill_tables()
print("filled")
as I said, everything worked fine, but now I'm getting this unexpected error, and not sure how to trace it back. The name "alan" is not in any of my credentials, which is confusing me.
I'm not running it via CLI, but through my text editor (in this case VS code).
So the reason this didn't work, is that I was pointing to the wrong directory for my .env file. dotenv_path = join(dirname(__file__), '.env') needs to "walk" up one more level to find my .env. Changed it to dotenv_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '.env')) and it worked. Just in case someone else has a similar issue, that might be something to check!
Might be unrelated, but double check your ports if using multiple instances: I also got psycopg2.OperationalError: FATAL: role "myUser" does not exist when I wanted to log in to one PostgreSQL database running on (default) port 5432 with credentials which I had set up in another instance running on port 5433...
I am new to python and thought I would practice what I have been learning to complete a little task. Essentially I am inserting a cognimatic cameras data into a database from a .csv file that I pulled from the web. sadly I have had to omit all the connection details as it can only be accessed from my works computer which means the script cannot be run.
To the problem!
I have a for loop that iterates through the cameras in the system, running this script:
#!/usr/bin/python
import pymssql
import urllib2
import sys
import getpass
import csv
import os
attempts = 0 #connection attempt counter
#check db connection with tsql -H cabernet.ad.uow.edu.au -p 1433 -U ADUOW\\mbeavis -P mb1987 -D library_gate_counts
server = "*****" #sever address
#myUser = 'ADUOW\\' + raw_input("User: ")# User and password for server. Will this be needed when the script runs on the server? # Ask David
#passw = getpass.getpass("Password: ")
while attempts < 3: # attempt to connect 3 times
try: #try connection
conn = pymssql.connect(server = server, user = '****', password = '****', database = "***", port='1433',timeout = 15, login_timeout = 15)
break
except pymssql.Error as e: #if connection fails print error information
attempts += 1
print type(e)
print e.args
camCursor = conn.cursor() #creates a cursor on the database
camCursor.execute("SELECT * FROM dbo.CAMERAS") #Selects the camera names and connection details
for rows in camCursor:
print rows
Everything is fine and the loop runs as it should, however when I actually try and do anything with the data the loop runs once and ends, this is the full script:
#!/usr/bin/python
import pymssql
import urllib2
import sys
import getpass
import csv
import os
attempts = 0 #connection attempt counter
#check db connection with tsql -H cabernet.ad.uow.edu.au -p 1433 -U ADUOW\\mbeavis -P mb1987 -D library_gate_counts
server = "*****" #sever address
#myUser = 'ADUOW\\' + raw_input("User: ")# User and password for server. Will this be needed when the script runs on the server? # Ask David
#passw = getpass.getpass("Password: ")
while attempts < 3: # attempt to connect 3 times
try: #try connection
conn = pymssql.connect(server = server, user = '****', password = '****', database = "***", port='1433',timeout = 15, login_timeout = 15)
break
except pymssql.Error as e: #if connection fails print error information
attempts += 1
print type(e)
print e.args
camCursor = conn.cursor() #creates a cursor on the database
camCursor.execute("SELECT * FROM dbo.CAMERAS") #Selects the camera names and connection details
for rows in camCursor:
print rows
cameraName = str(rows[0]) #converts UNICODE camera name to string
connectionDetails = str(rows[1]) #converts UNICODE connection details to string
try: #try connection
#connect to webpage, this will be changed to loop through the entire range of cameras, which will
#have their names and connection details stored in a seperate database table
prefix = "***"
suffix = "**suffix"
response = urllib2.urlopen(prefix + connectionDetails + suffix, timeout = 5)
content = response.read() #read the data for the csv page into content
f = open( "/tmp/test.csv", 'w' ) #open a file for writing (test phase only)
f.write( content ) #write the data stored in content to file
f.close() #close file
print content #prints out content
with open( "/tmp/test.csv", 'rb' ) as csvFile: #opens the .csv file previously created
reader = csv.DictReader(csvFile) #reader object of DictReader, allows for the first row to be the dictionary keys for the following rows
for row in reader: #loop through each row
start = row['Interval start']
end = row['Interval stop']
camName = row['Counter name']
pplIn = int(row['Pedestrians coming in'])
pplOut = int(row['Pedestrians going out'])
insertCursor = conn.cursor()
insert = "INSERT INTO dbo.COUNTS VALUES (%s, %s, %d, %d)"
insertCursor.execute(insert, (camName, start, pplIn, pplOut))
conn.commit()
except urllib2.URLError as e: #catch URL errors
print type(e)
print e.args
except urllib2.HTTPError as e: #catch HTTP erros
print type(e)
print e.code
I have been scratching my head as I cannot see why there is a problem, but maybe I just need some fresh eyes on it. Any help would be great cheers!
Have you tried to do something like
queryResult = camCursor.execute("SELECT * FROM dbo.CAMERAS")
for rows in queryResult:
...
I guess this might solve the problem, which is probably the fact that you're trying to iterate over a cursor instead of the results.
You might find this way interesting as well:
camCursor.execute("SELECT * FROM dbo.CAMERAS")
for rows in camCursor.fetchall():
...
Source: https://docs.python.org/2/library/sqlite3.html
I am using mysqldb/python to push some data into a mysql db.
The script parses a bunch of XML files for the data.
The MySQL server seems to quit and give me a '#2002 - The server is not responding (or the local MySQL server's socket is not correctly configured)' error midway through the transactions - in a different place every time I run it (so I am assuming its not a specific piece of data that is making it fall over...)
It works perfectly until it reaches ~12 or 13 file and it gives me this error:
Error 2003: Can't connect to MySQL server on 'localhost' (10055)
Traceback (most recent call last):
File "sigFileParser.py", line 113, in <module>
doParser(sigfile_filename)
File "sigFileParser.py", line 106, in
doParser
doFormatsPush(packedFormats)
File "sigFileParser.py", line 27, in
doFormatsPush
sys.exit (1)
NameError: global name 'sys' is not defined
Once the error occurs I can not get into MySQL console or via PHOPmyadmin
If I leave if for a while, I can get back into MySQL
MySQL tables:
CREATE TABLE IF NOT EXISTS patterns
(Version int(3),
DateCreated DATETIME,
SigID int(4),
SigSpecificity CHAR(10),
ByteSeqReference CHAR(12),
MinFragLength int(4),
Position int(4),
SubSeqMaxOffset int(4),
SubSeqMinOffset int(4),
Pattern TEXT)
and
CREATE TABLE IF NOT EXISTS formats
(Version int(3),
DateCreated DATETIME,
FormatID int(4),
FormatName TEXT,
PUID TEXT,
FormatVersion TEXT,
FormatMIMEType TEXT,
InternalSignatureID int(4),
Extension TEXT,
HasPriorityOverFileFormatID int(4))
Py code
from lxml import etree
import re, os, MySQLdb
def doPatternPush(packedPatterns):
try:
db = MySQLdb.connect (host = "localhost", user = "root", passwd = "", db = "sigfiles")
c = db.cursor()
c.execute('''INSERT INTO sigfiles.patterns
(Version,DateCreated,SigID,SigSpecificity,ByteSeqReference,MinFragLength,Position,SubSeqMaxOffset,SubSeqMinOffset,Pattern)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', packedPatterns)
db.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
return (db)
def doFormatsPush(packedFormats):
try:
db = MySQLdb.connect (host = "localhost", user = "root", passwd = "", db = "sigfiles")
c = db.cursor()
c.execute('''INSERT INTO sigfiles.formats
(Version,DateCreated,FormatID,FormatName,PUID,FormatVersion,FormatMIMEType,InternalSignatureID,Extension,HasPriorityOverFileFormatID)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', packedFormats)
db.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
return(db)
def doParser(sigfile_filename):
tree = etree.parse(sigfile_filename)
root = tree.getroot()
attributes = root.attrib
if 'DateCreated' in root.attrib:
DateCreated = (attributes["DateCreated"])
if 'Version' in root.attrib:
Version = (attributes["Version"])
##--------- get internal sig details ------------------
for a in range (len(root[0])): #loops for sig ID
attributes = root[0][a].attrib
SigID=(attributes["ID"])
SigSpecificity = (attributes["Specificity"])
for b in range (len(root[0][a])): # loops for sequence pattern inside each sig
attributes = root[0][a][b].attrib
if 'Reference' in root[0][a][b].attrib:
ByteSeqReference = (attributes["Reference"])
else:
ByteSeqReference = "NULL"
attributes = root[0][a][b][0].attrib
if 'MinFragLength' in root[0][a][b][0].attrib:
MinFragLength=(attributes["MinFragLength"])
else:
MinFragLength=''
if 'Position' in root[0][a].attrib:
Position=(attributes["Position"])
else:
Position=''
if 'SubSeqMaxOffset' in root[0][a][b][0].attrib:
SubSeqMaxOffset=(attributes["SubSeqMaxOffset"])
else:
SubSeqMaxOffsee = ''
if 'SubSeqMinOffset' in root[0][a][b][0].attrib:
SubSeqMinOffset=(attributes["SubSeqMinOffset"])
else:
SubSeqMinOffset = ''
Pattern = root[0][a][b][0][0].text
packedPatterns = [Version,DateCreated,SigID,SigSpecificity,ByteSeqReference,MinFragLength,Position,SubSeqMaxOffset,SubSeqMinOffset,Pattern]
doPatternPush(packedPatterns)
##-------- get format ID details-------------
for a in range (len(root[1])):
attributes = root[1][a].attrib
if 'ID' in root[1][a].attrib:
FormatID = (attributes['ID'])
else:
FormatID = "NULL"
if 'Name' in root[1][a].attrib:
FormatName = (attributes['Name'])
else:
FormatName = "NULL"
if 'PUID' in root[1][a].attrib:
PUID = (attributes['PUID'])
else:
PUID = "NULL"
if 'Version' in root[1][a].attrib:
FormatVersion = (attributes['Version'])
else:
FormatVersion = "NULL"
if 'MIMEType' in root[1][a].attrib:
FormatMIMEType = (attributes['MIMEType'])
else:
FormatMIMEType = "NULL"
InternalSignatureID,Extension,HasPriorityOverFileFormatID = ('', 'NULL', '')
for b in range (len(root[1][a])): #extracts the tags for each format ID
tagType = root[1][a][b].tag
tagText = root[1][a][b].text
tagType = re.sub('{http://www.nationalarchives.gov.uk/pronom/SignatureFile}', '', tagType)
if tagType == 'InternalSignatureID':
InternalSignatureID = tagText
elif tagType == 'Extension':
Extension = tagText
HasPriorityOverFileFormatID = ''
else:
HasPriorityOverFileFormatID = tagText
Extension = 'NULL'
packedFormats = [Version,DateCreated,FormatID,FormatName,PUID,FormatVersion,FormatMIMEType,InternalSignatureID,Extension,HasPriorityOverFileFormatID]
doFormatsPush(packedFormats)
if __name__ == "__main__":
path = "C:\Users\NDHA\Desktop\droid sigs all"
for (path, dirs, files) in os.walk(path):
for file in files:
sigfile_filename = str(path)+"\\"+str(file)
doParser(sigfile_filename)
print sigfile_filename
db.close()
All the XML comes from here: http://www.nationalarchives.gov.uk/aboutapps/pronom/droid-signature-files.htm
The error you get tells you exactly what's wrong
NameError: global name 'sys' is not defined
You don't import sys in your python file.
As for the db connection, if your socket is not placed in /tmp/mysql.sock, you can specify where to look for it when you try to connect to the db using the unix_socket parameter.
Try:
db = MySQLdb.connect (unix_socket = 'path_to_sock', host = "localhost",
user = "root", passwd = "", db = "sigfiles")
Where you replace 'path_to_sock' with the actual path to the mysql sock.
Other stuff you should check in case that isn't the issue:
Check to make sure the username/password combination is correct
Try stopping and re-starting the mysqld service
Check the error log files for more specific errors
This is your first error:
Error 2003: Can't connect to MySQL server on 'localhost' (10055)
It seems you disconnect from MySQL at some point. Check your code and see if you're explicitly or implicitly getting disconnected from the server and also check if your MySQL server is still listening to connections... maybe you're killing the server from outside your app... who knows? :)