Does my lambda function go inside my main python script? - python

I don't know how to write a Lambda. Here is my main_script.py that executes 2 stored procedures. It inserts records every day then finds the difference between yesterday's and today's records and writes them to a table.
import logging
import pymysql as pm
import os
import json
class className:
env=None
config=None
def __init__(self, env_filename):
self.env=env_filename
self.config=self.get_config()
def get_config(self):
with open(self.env) as file_in:
return json.load(file_in)
def DB_connection(self):
config=className.get_config(self)
username=config["exceptions"]["database-secrets"]["aws_secret_username"]
password=config["exceptions"]["database-secrets"]["aws_secret_password"]
host=config["exceptions"]["database-secrets"]["aws_secret_host"]
port=config["exceptions"]["database-secrets"]["aws_secret_port"]
database=config["exceptions"]["database-secrets"]["aws_secret_db"]
return pm.connect(
user=username,
password=password,
host=host,
port=port,
database=database
)
def run_all(self):
def test_function(self):
test_function_INSERT_QUERY = "CALL sp_test_insert();"
test_function_EXCEPTIONS_QUERY = "CALL sp_test_exceptions();"
test = self.config["exceptions"]["functions"]["test_function"]
if test:
with self.DB_connection() as cnxn:
with cnxn.cursor() as cur:
try:
cur.execute(test_function_INSERT_QUERY)
print("test_function_INSERT_QUERY insertion query ran successfully, {} records updated.".format(cur.rowcount))
cur.execute(test_function_EXCEPTIONS_QUERY)
print("test_function_EXCEPTIONS_QUERY exceptions query ran successfully, {} exceptions updated.".format(cur.rowcount))
except pm.Error as e:
print(f"Error: {e}")
except Exception as e:
logging.exception(e)
else:
cnxn.commit()
test_function(self)
def main():
cwd=os.getcwd()
vfc=(cwd+"\_config"+".json")
ve=className(vfc)
ve.run_all()
if __name__ == "__main__":
main()
Would I write my lambda_handler function inside my script above or have it as a separate script?
def lambda_handler(event, context):
#some code

I would treat lambda_handler(event, context) as the equivalent of main() with the exception that you do not need if __name__ ... clause because you never run a lambda function from the console.
You would also need to use boto3 library to abstract away AWS services and their functions. Have a look at the tutorial to get started.
As the first order of business, I would put the DB credentials out of the file system and into a secure datastore. You can of course configure Lambda environment variables, but Systems Manager Parameter Store is more secure and super-easy to call from the code, e.g.:
import boto3
ssm = boto3.client('ssm', region_name='us-east-1')
def lambda_handler(event, context):
password = ssm.get_parameters(Names=['/pathto/password'], WithDecryption=True)['Parameters'][0]['Value']
return {"password": password}
There is a more advanced option, the Secrets Manager, which for a little money will even rotate passwords for you (because it is fully integrated with Relational Database Service).

Related

How to mock PostgresSQL database with pytest fixture

I have a file with two functions that I must test that look like this:
def create_conn(config):
conn = psycopg2.connect(dbname = config['dbname'], ...)
return conn
def use_conn():
conn = create_conn(CONSTANT_CONFIG)
with conn.cursor() as cursor:
cursor.execute("some sql query")
conn.close()
I've been able to create pytest fixtures for testing functions that use DynamoDB and S3 with a mock database using moto that essentially overrides any call to boto3 like below:
# in conftest.py
#pytest.fixture()
def s3_client(aws_credentials):
with moto.mock_s3()
client = boto3.client('s3')
yield client
# in test file
def test_func(s3_client):
func() # a function that uses boto3
But I can't come across any examples that do something similar for PostgresSQL databases. Is there anyway to essentially override psycopg2.connect() to return a connection to a mock database? I think pytest-postgresql might help, but I couldn't find any code examples.

Why open the same one database with sqlalchemy, but get different, how can I update it?

I write some tests with pytest, I want to test create user and email with post method.
With some debug, I know the issue is I open two databases in memory, but they are same database SessionLocal().
So how can I fix this, I try db.flush(), but it doesn't work.
this is the post method code
#router.post("/", response_model=schemas.User)
def create_user(
*,
db: Session = Depends(deps.get_db), #the get_db is SessionLocal()
user_in: schemas.UserCreate,
current_user: models.User = Depends(deps.get_current_active_superuser),
) -> Any:
"""
Create new user.
"""
user = crud.user.get_by_email(db, email=user_in.email)
if user:
raise HTTPException(
status_code=400,
detail="The user with this username already exists in the system.",
)
user = crud.user.create(db, obj_in=user_in)
print("====post====")
print(db.query(models.User).count())
print(db)
if settings.EMAILS_ENABLED and user_in.email:
send_new_account_email(
email_to=user_in.email, username=user_in.email, password=user_in.password
)
return user
and the test code is:
def test_create_user_new_email(
client: TestClient, superuser_token_headers: dict, db: Session # db is SessionLocal()
) -> None:
username = random_email()
password = random_lower_string()
data = {"email": username, "password": password}
r = client.post(
f"{settings.API_V1_STR}/users/", headers=superuser_token_headers, json=data,
)
assert 200 <= r.status_code < 300
created_user = r.json()
print("====test====")
print(db.query(User).count())
print(db)
user = crud.user.get_by_email(db, email=username)
assert user
assert user.email == created_user["email"]
and the test result is
> assert user
E assert None
====post====
320
<sqlalchemy.orm.session.Session object at 0x7f0a9f660910>
====test====
319
<sqlalchemy.orm.session.Session object at 0x7f0aa09c4d60>
Your code does not provide enough information to help you, the key issues are probably in what is hidden and explained by your comments.
And it seems like you are confusing sqlalchemy session and databases. If you are not familiar with these concepts, I highly recommend you to have a look at SQLAlchemy documentation.
But, looking at your code structure, it seems like you are using FastAPI.
Then, if you want to test SQLAlchemy with pytest, I recommend you to use pytest fixture with SQL transactions.
Here is my suggestion on how to implement such a test. I'll suppose that you want to run the test on your actual database and not create a new database especially for the tests. This implementation is heavily based on this github gist (the author made a "feel free to use statement", so I suppose he is ok with me copying his code here):
# test.py
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from fastapi.testclient import TestClient
from myapp.models import BaseModel
from myapp.main import app # import your fastapi app
from myapp.database import get_db # import the dependency
client = TestClient(app)
# scope="session" mean that the engine will last for the whole test session
#pytest.fixture(scope="session")
def engine():
return create_engine("postgresql://localhost/test_database")
# at the end of the test session drops the created metadata using fixture with yield
#pytest.fixture(scope="session")
def tables(engine):
BaseModel.metadata.create_all(engine)
yield
BaseModel.metadata.drop_all(engine)
# here scope="function" (by default) so each time a test finished, the database is cleaned
#pytest.fixture
def dbsession(engine, tables):
"""Returns an sqlalchemy session, and after the test tears down everything properly."""
connection = engine.connect()
# begin the nested transaction
transaction = connection.begin()
# use the connection with the already started transaction
session = Session(bind=connection)
yield session
session.close()
# roll back the broader transaction
transaction.rollback()
# put back the connection to the connection pool
connection.close()
## end of the gist.github code
#pytest.fixture
def db_fastapi(dbsession):
def override_get_db():
db = dbsession
try:
yield db
finally:
db.close()
client.app.dependency_overrides[get_db] = override_get_db
yield db
# Now you can run your test
def test_create_user_new_email(db_fastapi):
username = random_email()
# ...

Can I improve the response time of this login service application by making parts of it asynchronous?

I have just written user login and logout but im trying to figure out what the most correct way of doing this is. From the documentation, there seems to be ways to make some of the code asynchronous, do i really need to do this? I've included the hashing functions (which i got from stackoverflow) so this is complete and can be built upon for very simple applications.
import os
import sqlite3
import hashlib
import binascii
from tornado.ioloop import IOLoop
from tornado.web import Application, RequestHandler
from tornado.options import define, options
define('port', default=80, help='port to listen on')
settings = dict(
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"),
debug=True,
cookie_secret="changethis",
login_url="/login",
# xsrf_cookies=True,
)
def hash_password(password):
"""Hash a password for storing."""
salt = hashlib.sha256(os.urandom(60)).hexdigest().encode('ascii')
pwdhash = hashlib.pbkdf2_hmac('sha512', password.encode('utf-8'),
salt, 100000)
pwdhash = binascii.hexlify(pwdhash)
return (salt + pwdhash).decode('ascii')
def verify_password(stored_password, provided_password):
"""Verify a stored password against one provided by user"""
salt = stored_password[:64]
stored_password = stored_password[64:]
pwdhash = hashlib.pbkdf2_hmac('sha512',
provided_password.encode('utf-8'),
salt.encode('ascii'), 100000)
pwdhash = binascii.hexlify(pwdhash).decode('ascii')
return pwdhash == stored_password
try:
db = sqlite3.connect('file:aaa.db?mode=rw', uri=True)
except sqlite3.OperationalError:
db = sqlite3.connect("aaa.db")
db.execute("CREATE TABLE Users (id INTEGER PRIMARY KEY, username TEXT NOT NULL UNIQUE, password TEXT NOT NULL);")
class BaseHandler(RequestHandler):
def get_current_user(self):
return self.get_secure_cookie("session")
class IndexHandler(BaseHandler):
def get(self):
if not self.current_user:
self.write("not logged in")
return
count = db.execute("SELECT COUNT(*) FROM Users;").fetchone()
self.write('{} users so far!'.format(count[0]))
class LoginHandler(BaseHandler):
def get(self):
if self.current_user:
self.write("already logged in")
return
self.render("login.html")
def post(self):
if self.current_user:
self.write("already logged in")
return
name=self.get_body_argument("username")
query= db.execute("SELECT COUNT(*) FROM Users WHERE username = ?;", (name,)).fetchone()
if query[0] == 0:
self.write("user does not exist")
else:
hashed_password = db.execute("SELECT (password) FROM Users WHERE username = ?;", (name,)).fetchone()[0]
if verify_password(hashed_password, self.get_body_argument("password")):
self.set_secure_cookie("session", name)
self.write("cookie set, logged in")
else:
self.write("wrong password")
class SignupHandler(BaseHandler):
def get(self):
if self.current_user:
self.write("already logged in")
return
self.render("signup.html")
def post(self):
if self.current_user:
self.write("already logged in")
return
name=self.get_body_argument("username")
password=self.get_body_argument("password")
try:
with db:
db.execute("INSERT INTO Users(username,password) VALUES (?,?);", (name, hash_password(password)))
except sqlite3.IntegrityError:
self.write("user exists")
return
self.write("user added")
class LogoutHandler(BaseHandler):
def get(self):
self.clear_cookie("session")
self.write("logged out")
def main():
routes=(
(r'/', IndexHandler),
(r'/login', LoginHandler),
(r'/logout', LogoutHandler),
(r'/signup', SignupHandler),
)
app = Application(routes, **settings)
app.listen(options.port)
IOLoop.current().start()
if __name__=="__main__":
main()
Short answer: No.
Async code doesn't make things "faster". Async code is just regular sync code with some extra capabilities to pause/resume operations. There's no speed gain. The purpose of async code is not speed, it's to achieve concurrency without the overhead of threads.
See these two functions in the following code:
def func1():
data = get_data_from_database()
return data
async def func2():
data = await get_data_from_database()
return data
func1 is synchronous and func2 is asynchronous. Both functions will have the same speed because they both have to wait for the database to return the data.
So, you can make your code async, but it won't result in any speed gain because the database will return the data at it's regular speed and only after that will your code be able to perform further actions.
And don't use SQLite with Tornado. It runs in the same Python process as your Tornado code. And since it's going to read and write data to/from disk, it will result in slower, blocking code which will block the whole Tornado server and result in poor performance. See below for an explanation of "Blocking Code".
Now, yes, you can make it asynchronous, by running it in a separate thread, but then why not just use a standalone database like PostgreSQL or MySQL in the first place?
Blocking Code
A code which stops the program from moving further or doing anything else is called blocking code.
Blocking code can be of any of the following types:
Network bound operations. For example you're making an http request, or a database request, this is a network bound operation and it is slower. And this results in blocking code because the code can't move forward until it gets the response.
Disk bound operations. For example if you read a file from the disk, it results in blocking code because if the disk is busy or slow, your code can't move forward until it gets the data from the disk.
CPU bound operations. For example doing really heavy calculations which take up significant CPU time. It will result in blocking code because the code can't move forward until it gets the result of the calculation from the CPU.
Asynchronous code is useful for network bound operations. For disk and CPU bound operations, synchronous code is better.

My Lambda python Function is returning null even after successful execution

Am a beginner to AWS services and python, i used below code in lambda to connect
RDS and invoke this in API gateway
After Successful execution of below code it is returning null.
#!/usr/bin/python
import sys
import logging
import pymysql
import json
rds_host="host"
name="name"
password="password"
db_name="DB"
port = 3306
def save_events(event):
"""
This function fetches content from mysql RDS instance
"""
result = []
conn = pymysql.connect(rds_host, user=name, passwd=password,
db=db_name,connect_timeout=30)
with conn.cursor() as cur:
cur.execute("SELECT * FROM exercise WHERE bid = '1'")
for row in cur:
result.append(list(row))
print ("Data from RDS...")
print (result)
cur.close()
print(json.dumps({'bodyParts':result}))
def lambda_handler(event, context):
save_events(event)
As pointed out in a comment by #John Gordon, you need to return something from your lambda_handler function.
It should be something like:
def lambda_handler(event, context):
save_events(event)
return {
"statusCode": 200,
"result": "Here is my result"
}
Additionally, I don't see any return statement from save_events either.

boto3 check if Athena database exists

Im making a script that creates a database in AWS Athena and then creates tables for that database, today the DB creation was taking ages, so the tables being created referred to a db that doesn't exists, is there a way to check if a DB is already created in Athena using boto3?
This is the part that created the db:
client = boto3.client('athena')
client.start_query_execution(
QueryString='create database {}'.format('db_name'),
ResultConfiguration=config
)
# -*- coding: utf-8 -*-
import logging
import os
from time import sleep
import boto3
import pandas as pd
from backports.tempfile import TemporaryDirectory
logger = logging.getLogger(__name__)
class AthenaQueryFailed(Exception):
pass
class Athena(object):
S3_TEMP_BUCKET = "please-replace-with-your-bucket"
def __init__(self, bucket=S3_TEMP_BUCKET):
self.bucket = bucket
self.client = boto3.Session().client("athena")
def execute_query_in_athena(self, query, output_s3_directory, database="csv_dumps"):
""" Useful when client executes a query in Athena and want result in the given `s3_directory`
:param query: Query to be executed in Athena
:param output_s3_directory: s3 path in which client want results to be stored
:return: s3 path
"""
response = self.client.start_query_execution(
QueryString=query,
QueryExecutionContext={"Database": database},
ResultConfiguration={"OutputLocation": output_s3_directory},
)
query_execution_id = response["QueryExecutionId"]
filename = "{filename}.csv".format(filename=response["QueryExecutionId"])
s3_result_path = os.path.join(output_s3_directory, filename)
logger.info(
"Query query_execution_id <<{query_execution_id}>>, result_s3path <<{s3path}>>".format(
query_execution_id=query_execution_id, s3path=s3_result_path
)
)
self.wait_for_query_to_complete(query_execution_id)
return s3_result_path
def wait_for_query_to_complete(self, query_execution_id):
is_query_running = True
backoff_time = 10
while is_query_running:
response = self.__get_query_status_response(query_execution_id)
status = response["QueryExecution"]["Status"][
"State"
] # possible responses: QUEUED | RUNNING | SUCCEEDED | FAILED | CANCELLED
if status == "SUCCEEDED":
is_query_running = False
elif status in ["CANCELED", "FAILED"]:
raise AthenaQueryFailed(status)
elif status in ["QUEUED", "RUNNING"]:
logger.info("Backing off for {} seconds.".format(backoff_time))
sleep(backoff_time)
else:
raise AthenaQueryFailed(status)
def __get_query_status_response(self, query_execution_id):
response = self.client.get_query_execution(QueryExecutionId=query_execution_id)
return response
As pointed in above answer, Athena Waiter is still not there implemented.
I use this light weighted Athena client to do the query, it returns the s3 path of result when the query is completed.
The waiter functions for Athena are not implemented yet: Athena Waiter
See: Support AWS Athena waiter feature for a possible workaround until it is implemented in Boto3. This is how it is implemented in AWS CLI.
while True:
stats = self.athena.get_query_execution(execution_id)
status = stats['QueryExecution']['Status']['State']
if status in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
break
time.sleep(0.2)

Categories

Resources