mocking S3 download file operation for gzipped files

mocking S3 download file operation for gzipped files - python

I am trying to mock some S3 operations and after banging my head against the stubber object, I tried doing something as follows:
def mock_make_api_call(self, operation_name, kwarg):
if operation_name == "ListObjectsV2":
return {
"KeyCount": 1,
"Contents": [
{"Key": "sensor_1", "LastModified": "2021-11-30T12:58:14+00:00"}
],
}
elif operation_name == "GetObjectTagging":
return {"TagSet": []}
elif operation_name == "HeadObject":
return {
"ContentLength": 10,
"ContentType": "gzip",
"ResponseMetadata": {
"Bucket": "1",
},
}
elif operation_name == "GetObject":
content = get_object_response()
return {
"ContentLength": len(content),
"ContentType": "xml",
"ContentEncoding": "gzip",
"Body": content,
"ResponseMetadata": {
"Bucket": "1",
},
}
Ot is the s3 download_fileoperation which is giving me a headache. As far as I can tell it generates, the HeadObjectand GetObjectcalls.
My content generation method is as follows:
def get_object_response():
content = b"<some-valid-xml>"
buf = BytesIO()
compressed = gzip.GzipFile(fileobj=buf, mode="wb")
compressed.write(content)
compressed.close()
return buf.getvalue()
The way it gets used is:
with NamedTemporaryFile() as tmp:
s3_client.download_file(Bucket=..., Key=..., Filename=tmp.name)
However, my test fails with:
elf = <s3transfer.utils.StreamReaderProgress object at 0x116a77820>
args = (262144,), kwargs = {}
def read(self, *args, **kwargs):
> value = self._stream.read(*args, **kwargs)
E AttributeError: 'bytes' object has no attribute 'read'
I simply cannot figure out how to encode the response so that the generated content can be saved.

Related

FastAPI mocking dependencies

I am writing unit test cases for my fastapi project and unable to mock a dynamodb call.
File_1
This file has all the methods to perform DynamoDB actions using boto3 calls.
class DynamoDBRepository:
Insert Item - Inserts value
Get Item - Returns value
#File_2
Has a "AppConfig" class which will be used as a dependency in a later file
from file_1 import DynamoDBRepository
class AppConfig:
def __init__(self) -> None:
"""Constructor class to instantiate dynamodb"""
self._job_table = "Dynamo_DB_Table"
self._region = "Table_region"
self._dynamodb_repository = DynamoDBRepository(table=self._job_table, region=self._region) # created a object for the dynamodb class mentioned in file 1.
File_3:
This file has the fast_api route decorator
from file_2 import AppConfig
#router.get(
"/object/{object_id}"
)
def get_request(
object_id: str,
objects: AppConfig = Depends(AppConfig),
) -> ObjectBody:
try:
object_detail = objects._dynamodb_repository.get_item({"object_id": object_id})
return object_detail["Item"]
I am trying to mock the get_item method in my test file:
File_4
This is my test file in which
client = TestClient(fast_api_app)
class MockAppConfig:
def __init__(self) -> None:
"""Constructor class to instantiate dynamodb and lambda"""
self._job_table = "Dynamo_DB_Table"
self._region = "Table_region"
self._dynamodb_repository = DynamoDBRepository(table=self._job_table, region=self._region)
def test_get_request():
fast_api_app.dependency_overrides[AppConfig] = MockAppConfig
MockAppConfig()._dynamodb_repository.get_item = {
"id": "1234",
"title": "Foo",
"description": "Hello",
}
response = client.get("/objects/1234")
assert response.status_code == 200
assert response.json() == {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
The mocking of get_item is not working and it is still querying the original db and failing due to credential check.
I tried monkeypatch & fastapi_dep fixtures, also patching but somehow the get_item method mocking isn't working

Will mocking get_item method work?
class MockDynamoDBRepository():
def get_item(*args, **kwargs):
return {
"Item": {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
}
class MockAppConfig:
def __init__(self) -> None:
"""Constructor class to instantiate dynamodb and lambda"""
self._job_table = "Dynamo_DB_Table"
self._region = "Table_region"
self._dynamodb_repository = MockDynamoDBRepository(table=self._job_table, region=self._region)
def test_get_request():
fast_api_app.dependency_overrides[AppConfig] = MockAppConfig
response = client.get("/objects/1234")
assert response.status_code == 200
assert response.json() == {
"id": "foo",
"title": "Foo",
"description": "Hi",
}

Building on #svfat's answer, here is how you can do the test with fastapi_dep - pick any one of the test approaches - with clause or indirect parameter:
class MockDynamoDBRepository():
def __init__(self, *args, **kwargs):
pass
def get_item(self, *args, **kwargs):
return {
"Item": {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
}
class MockAppConfig:
def __init__(self) -> None:
"""Constructor class to instantiate dynamodb and lambda"""
self._job_table = "Mock Dynamo_DB_Table"
self._region = "Mock Table_region"
self._dynamodb_repository = MockDynamoDBRepository(table=self._job_table,
region=self._region)
def test_get_request_deps(fastapi_dep):
with fastapi_dep(fast_api_app).override(
{
AppConfig: MockAppConfig,
}
):
response = client.get("/objects/1234")
assert response.status_code == 200
assert response.json() == {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
#pytest.mark.parametrize(
"fastapi_dep",
[
(
fast_api_app,
{AppConfig: MockAppConfig},
)
],
indirect=True,
)
def test_get_request_deps_indirect(fastapi_dep):
response = client.get("/objects/1234")
assert response.status_code == 200
assert response.json() == {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
If you don't want to create all the extra classes, you can use the pure mock approach like so:
from mock.mock import MagicMock
def test_get_request_deps_mock(fastapi_dep):
my_mock = MagicMock()
my_mock._dynamodb_repository.get_item.return_value = {
"Item": {
"id": "foo",
"title": "Foo",
"description": "Hi",
}
}
with fastapi_dep(file_3.app).override(
{
AppConfig: lambda: my_mock,
}
):
response = client.get("/objects/1234")
assert response.status_code == 200
assert response.json() == {
"id": "foo",
"title": "Foo",
"description": "Hi",
}

How to check AWS lambda handler in unit test python

I need to write a unit test for the AWS lambda handler. I want to give some JSON files to the handler method and get some output. But I come across trouble with mocking the S3 "event".
My lambda handler looks like this:
def handler(event, context):
print(f'Event: {event}')
s3 = boto3.resource('s3')
bucket = s3.Bucket(event["bucket"])
word_correction = correction.WordCorrection()
for obj in bucket.objects.all():
key = obj.key
body = obj.get()['Body'].read()
data = get_data_from_file(body)
if key.endswith('.json'):
try:
word_correction.create_duplicated_words_file(data)
except Exception as e:
print(e)
return {
"success": False,
"response": f"Failed to read file - {e}"
}
try:
corrected_word_list = word_correction.spell_words(json.loads(body))
except Exception as e:
print(e)
return {
"success": False,
"response": f"Failed to correct words - {e}"
}
else:
return {
"success": False,
"response": "Invalid file type. File must have .json extension."
}
return {
"success": True,
"response": corrected_word_list
}
Here I put some JSON data to my module word_correction.
From this point my unit test
S3_BUCKET_NAME = 'dev-ayazv-lambda-spell-correction'
DEFAULT_REGION = 'us-east-1'
S3_TEST_FILE_KEY = 'pdfs/manual_test/page-data-page-9.json'
S3_TEST_FILE_CONTENT = {"Blocks": [{"BlockType": "WORD", "Confidence": 93.18, "Text": "Test"}]}
#mock_s3
class TestLambdaFunction(unittest.TestCase):
def setUp(self):
self.s3 = boto3.resource('s3', region_name=DEFAULT_REGION)
self.s3_bucket = self.s3.create_bucket(Bucket=S3_BUCKET_NAME)
self.s3_bucket.put_object(Key=S3_TEST_FILE_KEY,
Body=json.dumps(S3_TEST_FILE_CONTENT))
def test_get_data_from_file(self):
from functions.spell_correction.src.index import get_data_from_file
json_encode_data = json.dumps(S3_TEST_FILE_CONTENT, indent=2).encode('utf-8')
file_content = get_data_from_file(json_encode_data)
self.assertEqual(file_content, S3_TEST_FILE_CONTENT)
def test_handler(self):
from functions.spell_correction.src.index import handler
event = {
'bucket': {
'name': S3_BUCKET_NAME
},
'object': {
'key': S3_TEST_FILE_KEY
}
}
result = handler(event, {})
self.assertEqual(result, {"success": True, "response": []})
I was trying to mock the S3_Bucket S3_File_Key and S3_File Content.
But I face with the problem that my tests drops in this moment
bucket = {'name': 'dev-ayazv-lambda-spell-correction'}
def validate_bucket_name(params, **kwargs):
if 'Bucket' not in params:
return
bucket = params['Bucket']
> if not VALID_BUCKET.search(bucket) and not VALID_S3_ARN.search(bucket):
E TypeError: expected string or bytes-like object

How to align table in a google document using google docx API and Python

I've got solution to add table without border suggested by Tanaike but I'm still facing issues in indexing.
I want to insert data in the document in the following order (function - insert_data(file_id)) -
Insert an image in a document (Index = 1)
Insert text in a document (index = 2)
Insert table in a document having invisible borders (index = 3)
Insert text in the document (index = 4)
Insert table in a document again having invisible borders (index = 5)
Insert new line (index = 6)
Insert image in a document (index = 7)
The code I'm trying is-
import io
from gdoctableapppy import gdoctableapp
SERVICE_FILENAME = 'C:/Users/XYZ/Testpython/service_account.json' # set path to service account filename
from googleapiclient.discovery import build
from google.oauth2 import service_account
from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload
credentials = service_account.Credentials.from_service_account_file(SERVICE_FILENAME,
scopes=['https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/documents']
)
docs = build('docs', 'v1', credentials=credentials)
drive = build('drive', 'v3', credentials=credentials)
def create_file(file_name):
file_metadata = {
"title": file_name,
"body": {}
}
file = docs.documents().create(body=file_metadata).execute()
print('File ID: %s' % file.get('documentId'))
file_id = file.get('documentId')
try:
permission = {
"role": "writer",
"type": "user",
'emailAddress': 'xyz#gmail.com'
}
result = drive.permissions().create(fileId=file_id, body=permission).execute()
print(result)
return file_id
except Exception as e:
print('An error occurred:', e)
return None
def insert_data(file_id):
requests = []
values = [['Name of the Client/Organization', 'XYZ'], ['Industry', 'Software']]
requests.append(insert_table_data(file_id, values, index=3))
values2 = [['Country', 'India'], ['State', 'UP']]
requests.append(insert_table_data(file_id, values2, index=5))
requests.append(insert_image(index=1))
requests.append(insert_text(2, '\ntext\n'))
requests.append(insert_text(4, '\nDemo text\n'))
requests.append(insert_text(6, '\n'))
requests.append(insert_image(index=7))
result = docs.documents().batchUpdate(documentId=file_id, body={'requests': requests}).execute()
def insert_image(index):
image_data = {
'insertInlineImage': {
'location': {
'index': index
},
'uri':
'https://www.oberlo.com/media/1603970279-pexels-photo-3.jpg?fit=max&fm=jpg&w=1824',
'objectSize': {
'height': {
'magnitude': 350,
'unit': 'PT'
},
'width': {
'magnitude': 350,
'unit': 'PT'
}
}
}
}
return image_data
def insert_text(index, text):
text_data = {
"insertText":
{
"text": text,
"location":
{
"index": index
}
}
}
return text_data
def insert_table_data(file_id, values, index):
documentId = file_id
resource = {
"oauth2": credentials,
"documentId": documentId,
"rows": len(values),
"columns": len(values[0]),
# "append": True,
"createIndex": index,
"values": values,
}
gdoctableapp.CreateTable(resource)
resource = {
"oauth2": credentials,
"documentId": documentId,
}
res = gdoctableapp.GetTables(resource)
obj = {"color": {"color": {}}, "dashStyle": "SOLID", "width": {"magnitude": 0, "unit": "PT"}}
data = {
"updateTableCellStyle": {
"tableCellStyle": {
"borderBottom": obj,
"borderTop": obj,
"borderLeft": obj,
"borderRight": obj,
},
"tableStartLocation": {
"index": res['tables'][-1]['tablePosition']['startIndex']
},
"fields": "borderBottom,borderTop,borderLeft,borderRight"
}
}
# docs.documents().batchUpdate(documentId=documentId, body={'requests': requests}).execute()
return data
def download_as_docx(file_id):
results = drive.files().get(fileId=file_id, fields="id, name, mimeType, createdTime").execute()
docMimeType = results['mimeType']
mimeTypeMatchup = {
"application/vnd.google-apps.document": {
"exportType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docExt": "docx"
}
}
exportMimeType = mimeTypeMatchup[docMimeType]['exportType']
# docExt = mimeTypeMatchup[docMimeType]['docExt']
docName = results['name']
request = drive.files().export_media(fileId=file_id,
mimeType=exportMimeType) # Export formats : https://developers.google.com/drive/api/v3/ref-export-formats
# fh = io.FileIO(docName + "." + docExt, mode='w')
fh = io.FileIO(docName, mode='w')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
def download_as_pdf(file_id, file_name):
request = drive.files().export_media(fileId=file_id,
mimeType='application/pdf')
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
fh.seek(0)
filename = file_name.split('.docx')[0] + '.pdf'
with open(filename, 'wb') as fx:
fx.write(fh.getvalue())
def delete_gdrive_file(file_id):
"""Deleted file on Google Drive
:param file_id: ID of Google Drive file
"""
response = drive.files().delete(fileId=file_id).execute()
print(response)
if __name__ == '__main__':
file_name = 'Data.docx'
file_id = create_file(file_name)
insert_data(file_id)
download_as_docx(file_id)
download_as_pdf(file_id, file_name)
delete_gdrive_file(file_id)
Error:
returned "Invalid requests[0].insertTable: Index 4 must be less than the end index of the referenced segment, 2.". Details: "Invalid requests[0].insertTable: Index 4
must be less than the end index of the referenced segment, 2.">
I guess end index of the table goes to 67 but even if I try to insert new data at index 68, it either appends in the last cell of the table or it throws indexing error sometimes.
I should I make the whole data insertion flow dynamic in the google docs.

Modification points:
The library gdoctableapp creates the table by one call. By this, when you request the flow of your question, the index is changed for the tables. I thought that this is the reason of your issue.
In this case, how about the following modification?
Modified script:
Please modify insert_table_data as follows.
def insert_table_data(file_id, values, index):
documentId = file_id
resource = {
"oauth2": credentials,
"documentId": documentId,
"rows": len(values),
"columns": len(values[0]),
# "append": True,
"createIndex": index,
"values": values,
}
gdoctableapp.CreateTable(resource)
And also, please modify insert_data as follows.
def insert_data(file_id):
# Insert texts and images.
index = 1
requests = []
requests.append(insert_image(index))
index += 1
text1 = '\ntext\n'
requests.append(insert_text(index, text1))
index += len(text1)
table1 = index
text2 = '\nDemo text\n'
requests.append(insert_text(index, text2))
index += len(text2)
table2 = index
text3 = '\n'
requests.append(insert_text(index, text3))
index += len(text3)
requests.append(insert_image(index))
docs.documents().batchUpdate(documentId=file_id, body={'requests': requests}).execute()
# Create tables.
values2 = [['Country', 'India'], ['State', 'UP']]
insert_table_data(file_id, values2, table2)
values1 = [['Name of the Client/Organization', 'XYZ'], ['Industry', 'Software']]
insert_table_data(file_id, values1, table1)
# Remove borders of tables.
resource = {"oauth2": credentials, "documentId": file_id}
res = gdoctableapp.GetTables(resource)
obj = {"color": {"color": {}}, "dashStyle": "SOLID", "width": {"magnitude": 0, "unit": "PT"}}
reqs = []
for e in res['tables']:
data = {
"updateTableCellStyle": {
"tableCellStyle": {
"borderBottom": obj,
"borderTop": obj,
"borderLeft": obj,
"borderRight": obj,
},
"tableStartLocation": {
"index": e['tablePosition']['startIndex']
},
"fields": "borderBottom,borderTop,borderLeft,borderRight"
}
}
reqs.append(data)
docs.documents().batchUpdate(documentId=file_id, body={'requests': reqs}).execute()
In this modification, I separate the texts and images, and the tables. By this, the index of tables can be correctly retrieved.
Note:
This modified script is for your question. So when your actual situation is different from your question, this modified script might not be able to be directly used. So please be careful about this.

Flask Restful: Converting CSV file to JSON Object

I'm reading a file from the frontend and converting it to JSON consists of uuid and a list of JSON objects:
class File(Resource):
def post(self):
if 'file' not in request.files:
return {"message": "No File in the request!"}, 400
file = request.files['file']
if file.filename == '':
return {'message': "No File selected!"}, 400
if file:
filename = secure_filename(file.filename)
#file_contents = file.read()
print(file)
converted_json = File.csv2json(file)
new_request = {
'uuid': str(uuid.uuid4()),
'devices': converted_json
}
print(new_request)
return new_request, 201
#classmethod
def csv_to_json(cls, data):
df = pd.read_csv(data, header=None)
df.columns = ['name', 'username', 'password', 'domain']
print(df)
df = df.to_json(orient='records')
print(df)
return df
Output is double encoded:
{
"uuid": "1a09ad79-dc78-4759-9aa0-f1dda9c08dc4",
"details": "[{\"name\":\"joe\",\"username\":\"admin\",\"password\":\"admin\",\"domain\":\"abc.xyz.com\"}]"
}
How to get JSON Object output like (with newline at the end of each ','):
{
"uuid": "1a09ad79-dc78-4759-9aa0-f1dda9c08dc4",
"details": [
{"name": "joe",
"username": "admin",
"password": "admin",
"domain"" "abc.xyz.com"}
]
}

Never mind!
Using 'json.loads(df)' after 'df = df.to_json(orient='records')' did the trick!

Writing dicitonary to JSON using Python

I'm new to Python programming, so do bear with me if I make any mistakes anywhere
I'm trying to write a json file using 2 dictionaries and dump the output to the file using the following code on Windows
import json
import sys
import string
from time import strftime
scan_results = open("scan_results.txt", "r")
saved = sys.stdout
f = file('report.json', 'wb')
sys.stdout = f
for line in scan_results:
if ".jpg" in line:
lst = []
result = line.split('\\')
result_split = result[5].split(' ')
filename = result_split[0]
raw_status = result_split[3]
if "OK" in raw_status:
status = "Okay"
status_code = "0"
dict = {'FileName': filename, 'DateTime': strftime("%Y-%m-%d %H:%M:%S"), 'statusCode': status_code, 'Description': status}
dict2 = {filename : dict}
print json.dumps(dict2)
sys.stdout = saved
f.close()
print "JSON report written"
The problem is, the output that I have is
{
"car-30537.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30537.jpg"
}
}{
"car-30538.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30538.jpg"
}
}
whereas the output that I want is
{
"car-30537.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30537.jpg"
},
{
"car-30538.jpg": {
"statusCode": "0",
"DateTime": "2012-02-07 09:52:26",
"Description": "Okay",
"FileName": "car-30538.jpg"
}
}
Is there any ways to correct this problem? Thanks in advance

You are making lots of dicts, while you only need one main containing one:
import json
import sys
import string
from time import strftime
scan_results = open("scan_results.txt", "r")
saved = sys.stdout
f = file('report.json', 'wb')
sys.stdout = f
dict2 = {} #Create one output dict
for line in scan_results:
if ".jpg" in line:
lst = []
result = line.split('\\')
result_split = result[5].split(' ')
filename = result_split[0]
raw_status = result_split[3]
if "OK" in raw_status:
status = "Okay"
status_code = "0"
dict2[filename] = {'FileName': filename, 'DateTime': strftime("%Y-%m-%d %H:%M:%S"), 'statusCode': status_code, 'Description': status} #Add to that dict.
print json.dumps(dict2) #Print it out at the end.
sys.stdout = saved
f.close()
print "JSON report written"
I added comments to modified lines.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

mocking S3 download file operation for gzipped files - python

Related

FastAPI mocking dependencies

How to check AWS lambda handler in unit test python

How to align table in a google document using google docx API and Python

Flask Restful: Converting CSV file to JSON Object

Writing dicitonary to JSON using Python

Categories

Resources