I'm trying to download folder from my s3 bucket. I want to use s3.client because I have used client method further in the code but I'm not able to access the bucket using client method. When I use "s3Client.Bucket(bucketName)" I get an error saying it has no attribute Bucket. When I use "s3Client.get_object(Bucket=bucketName, Key= ?)" first it said key is required, what should be the key, is it the folder I want to download? Please let me know what am I doing wrong here. Thank you.
awsParams = {
"bucket_name": "asgard-icr-model",
"region_name": "ap-south-1"
}
def get_s3_client():
s3Client = boto3.client('s3')
return s3Client
def download_from_s3(srcDir, dstDir):
try:
bucketName = awsParams['bucket_name'] #s3 bucket name
s3Client = get_s3_client()
bucket = s3Client.Bucket(bucketName) # I get error saying - client has no attribute Bucket.
bucket = s3Client.get_object(Bucket=bucketName, Key= ?) # If I use this line instead of previous, what should be my key here?
So now if I do this change, what should I use instead of list_objects_v2() in s3.resource as there is no attribute with this name?
def get_s3_object():
s3Obj = boto3.resource("s3",region_name=awsParams['region_name'])
return s3Obj
def download_from_s3(srcDir, dstDir):
try:
bucketName = awsParams['bucket_name'] #s3 bucket name
# s3Client = get_s3_client()
# bucket = s3Client.Bucket(bucketName)
# bucket = s3Client.get_object(Bucket=bucketName, Key=)
s3Obj = get_s3_object()
bucket = s3Obj.Bucket(bucketName)
keys = []
dirs = []
next_token = ''
base_kwargs = {
'Bucket':bucket,
'srcDir':srcDir,
}
while next_token is not None:
kwargs = base_kwargs.copy()
if next_token != '':
kwargs.update({'ContinuationToken': next_token})
**results = s3Client.list_objects_v2(**kwargs)**
contents = results.get('Contents')
for i in contents:
k = i.get('Key')
if k[-1] != '/':
keys.append(k)
else:
dirs.append(k)
next_token = results.get('NextContinuationToken')
for d in dirs:
dest_pathname = os.path.join(local, d)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
for k in keys:
dest_pathname = os.path.join(local, k)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
s3Client.download_file(bucket, k, dest_pathname)
except Exception as e:
raise
When using a client, you can obtain a list of objects with:
s3_client = boto3.client('s3')
results = s3_client.list_objects_v2(Bucket=...)
for object in results['Contents']:
print(object['Key'])
When using a resource, you can use:
s3_resource = boto3.resource('s3')
bucket = s3_resource.Bucket('Bucketname')
for object in bucket.objects.all():
print(object.key)
You should be using a resource, not client:
s3Resource = boto3.resource('s3')
return s3Resource
end then
bucket = s3Resource.Bucket(bucketName)
Related
I want to copy a sub-subfolder in an S3 bucket into a different bucket using Python (boto3).
However, the process is painfully slow.
If I copy the folder "by hand" straight on S3 from the browser, the process takes 72 seconds (for a folder with around 140 objects, total size roughly 1.0 GB).
However, if I try to copy it with boto3, it takes 9 times longer (653 seconds).
This is the code that I am using, re-adapted from the boto3 documentation and various answers here in SO:
import boto3
s3 = boto3.resource('s3')
# define source bucket
src_bucket_name = 'bucket_1'
prefix = 'folder_1/'
client = boto3.client('s3')
src_bucket = s3.Bucket(src_bucket_name)
# define destination bucket
dest_bucket_name = 'bucket_2'
dest_bucket = s3.Bucket(dest_bucket_name)
folder = "folder_1/subfolder_1"
response_sub = client.list_objects_v2(Bucket=src_bucket_name, Prefix = folder)
# list files to be copied (select only images, but in this folder there are only images anyway)
files_src = [prefix['Key'] for prefix in response_sub['Contents'] if prefix['Key'].split('.')[-1].lower() in ['jpg','jpeg','png','tiff'] ]
# list of file names after copy
dest_prefix = 'folder_1/subfolder_1/'
files_dest = [dest_prefix+i for i in files_src]
for src,dest in zip(files_src,files_dest):
copy_source = {
'Bucket': src_bucket_name,
'Key': src
}
dest_bucket.copy(copy_source, dest)
Note that up to the last for loop, the code takes a couple of seconds only to run.
Any idea of how to speed up this? Am I doing something stupid/should use some other way of copying files/entire folders?
Thanks to #Suyog Shimpi (who pointed to a similar SO post), I was able to significantly speed up the copying process.
Here the code slightly readapted from the other post:
import os
import boto3
import botocore
import boto3.s3.transfer as s3transfer
import tqdm
s3 = boto3.resource('s3')
# define source bucket
src_bucket_name = 'bucket_1'
prefix = 'folder_1/'
client = boto3.client('s3')
src_bucket = s3.Bucket(src_bucket_name)
# define destination bucket
dest_bucket_name = 'bucket_2'
dest_bucket = s3.Bucket(dest_bucket_name)
folder = "folder_1/subfolder_1"
response_sub = client.list_objects_v2(Bucket=src_bucket_name, Prefix = folder)
# list files to be copied (select only images, but in this folder there are only images anyway)
files_src = [prefix['Key'] for prefix in response_sub['Contents'] if prefix['Key'].split('.')[-1].lower() in ['jpg','jpeg','png','tiff'] ]
# list of file names after copy
dest_prefix = 'folder_1/subfolder_1/'
files_dest = [dest_prefix+i for i in files_src]
botocore_config = botocore.config.Config(max_pool_connections=20)
s3client = boto3.client('s3', config=botocore_config)
transfer_config = s3transfer.TransferConfig(
use_threads=True,
max_concurrency=20,
)
# note that timing the process is optional
# total_size of the files can be obtained with boto3, or on the browser
%time
progress = tqdm.tqdm(
desc='upload',
total=total_size, unit='B', unit_scale=1,
position=0,
bar_format='{desc:<10}{percentage:3.0f}%|{bar:10}{r_bar}')
s3t = s3transfer.create_transfer_manager(s3client, transfer_config)
for src,dest in zip(files_src,files_dest):
copy_source = {
'Bucket': src_bucket_name,
'Key': src
}
s3t.copy(copy_source=copy_source,
bucket = dest_bucket_name,
key = dest,
subscribers=[s3transfer.ProgressCallbackInvoker(progress.update),],
)
# close transfer job
s3t.shutdown()
progress.close();
Thanks Fraccalo for your solution, it helped me a lot!
I adjusted it a little so that we can copy more than 1000 files:
import boto3
import botocore
import boto3.s3.transfer as s3transfer
import tqdm
s3 = boto3.resource('s3')
# define source bucket
src_bucket_name = 'bucket_1'
prefix = 'folder_1/'
client = boto3.client('s3')
src_bucket = s3.Bucket(src_bucket_name)
# define destination bucket
dest_bucket_name = 'bucket_2'
dest_bucket = s3.Bucket(dest_bucket_name)
folder = "folder_1/subfolder_1"
files_src = []
bucket_size = 0
# use paginator to read more than 1000 files
paginator = client.get_paginator('list_objects_v2')
operation_parameters = {'Bucket': src_bucket_name,
'Prefix': folder}
page_iterator = paginator.paginate(**operation_parameters)
for page in page_iterator:
if page.get('Contents', None):
files_src.extend([prefix['Key'] for prefix in page['Contents']])
bucket_size += sum(obj['Size'] for obj in page['Contents'])
# list of file names after copy
dest_prefix = 'folder_1/subfolder_1/'
files_dest = [dest_prefix+i for i in files_src]
botocore_config = botocore.config.Config(max_pool_connections=20)
s3client = boto3.client('s3', config=botocore_config)
transfer_config = s3transfer.TransferConfig(
use_threads=True,
max_concurrency=20,
)
progress = tqdm.tqdm(
desc='upload',
total=bucket_size, unit='B', unit_scale=1,
position=0,
bar_format='{desc:<10}{percentage:3.0f}%|{bar:10}{r_bar}')
s3t = s3transfer.create_transfer_manager(s3client, transfer_config)
for src,dest in zip(files_src,files_dest):
copy_source = {
'Bucket': src_bucket_name,
'Key': src
}
s3t.copy(copy_source=copy_source,
bucket = dest_bucket_name,
key = dest,
subscribers=[s3transfer.ProgressCallbackInvoker(progress.update),],
)
# close transfer job
s3t.shutdown()
progress.close();
I have a hosted zone in route 53 and would like to have the contents of the hostzone object stored in S3 but I am getting an error. I am thinking Body is the correct parameter but maybe this is because the object is in JSON format?
import boto3
import json
def allwork():
client = boto3.client('route53')
hostzone = client.list_hosted_zones()
bucket_name = "testlambda"
file_name = "r53data.txt"
lambda_path = "/tmp/" + file_name
s3_path = "10102018/" + file_name
s3 = boto3.resource("s3")
s3.Bucket(bucket_name).put_object(Key=s3_path, Body=hostzone)
allwork()
Here is the error:
module initialization error: Parameter validation failed:
Invalid type for parameter Body, value: {u'HostedZones':
[{u'ResourceRecordSetCount': 7, u'CallerReference': '814E3.........
tkausl answered the question in the comments:
Looks like it returns a dict, so you need to json encode it manually before passing it to put_object
update:
import boto3
import json
def allwork():
client = boto3.client('route53')
hostzone = client.list_hosted_zones()
bucket_name = "testlambda"
file_name = "r53data.txt"
lambda_path = "/tmp/" + file_name
s3_path = "10102018/" + file_name
hostzone2=json.dumps(hostzone, ensure_ascii=False)
s3 = boto3.resource("s3")
s3.Bucket(bucket_name).put_object(Key=s3_path, Body=hostzone2)
allwork()
For a project, I need to download some items in my S3 bucket. I have already seen similar posts about this topic; however, I hardcoded my access key ID and secret access key inside the program and am still unable to download them. Python keeps returning the error:
"botocore.exceptions.NoCredentialsError: Unable to locate credentials"
Despite providing my credentials, I am still unable to download. My code is provided below. Can anyone help me correct this?
import boto3
import os
"""import sys
import csv
import pandas as pd
import numpy as np
import tensorflow as tf"""
import nibabel as nib
from boto3.session import Session
aws_access_key_id = '********************'
aws_secret_access_key = '****************************************'
bucket1 = 'adnimcic'
mcic = [[], [], []]
mcicc = [[], [], []]
bucket2 = 'adnimcinc'
mcinc = [[], [], []]
bucket3 = 'adniresults'
results = []
s3_client = boto3.client('s3')
#connecting to S3
session = Session(aws_access_key_id, aws_secret_access_key)
s3 = session.resource('s3')
bucket1obj = s3.Bucket(bucket1)
#bucket2obj = s3.Bucket(bucket2)
#'MCIc_Segmented/ADNI_002_S_0729_MR_MP-RAGE_REPEAT_br_raw_20070225105857428_72_S27091_I41585_be_be_pve_2.nii.gz_extracted'
def concatenate(name):
name = name.split('.')
name.pop()
name = name[0] + '.' + name[1]
name = name.split('/')
name = name[1]
return name
def download(bucketname, key):
path = '/Volumes/LaCie Mac/' + concatenate(key.key)
s3_client.download_file(bucketname, key.key, path)
for key in bucket1obj.objects.all():
if 'pve_0' and 'extracted' in key.key:
mcic[0].append(key)
download(bucket1, key)
for key in bucket1obj.objects.all():
if 'pve_1' and 'extracted' in key.key:
mcic[1].append(key)
download(bucket1, key)
for key in bucket1obj.objects.all():
if 'pve_2' and 'extracted' in key.key:
mcic[2].append(key)
download(bucket1, key)
Looking at your code, you have two different S3 objects, s3_client and s3:
s3_client = boto3.client('s3')
...
session = Session(aws_access_key_id, aws_secret_access_key)
s3 = session.resource('s3')
...
def download(bucketname, key):
path = '/Volumes/LaCie Mac/' + concatenate(key.key)
s3_client.download_file(bucketname, key.key, path)
```
Looking at your code, it looks like you use your aws_access_key_id and aws_secret_access_key on your session object which you use on your s3 resource as expected, but you don't use the session on the s3_client.
If I understand your problem correctly, you should be able to resolve this issue by creating the client from your session, like:
session = Session(aws_access_key_id, aws_secret_access_key)
s3 = session.resource('s3')
s3_client = session.client('s3')
I have a versioned bucket and would like to delete the object (and all of its versions) from the bucket. However, when I try to delete the object from the console, S3 simply adds a delete marker but does not perform a hard delete.
Is it possible to delete all versions of the object (hard delete) with a particular key?:
s3resource = boto3.resource('s3')
bucket = s3resource.Bucket('my_bucket')
obj = bucket.Object('my_object_key')
# I would like to delete all versions for the object like so:
obj.delete_all_versions()
# or delete all versions for all objects like so:
bucket.objects.delete_all_versions()
The other answers delete objects individually. It is more efficient to use the delete_objects boto3 call and batch process your delete. See the code below for a function which collects all objects and deletes in batches of 1000:
bucket = 'bucket-name'
s3_client = boto3.client('s3')
object_response_paginator = s3_client.get_paginator('list_object_versions')
delete_marker_list = []
version_list = []
for object_response_itr in object_response_paginator.paginate(Bucket=bucket):
if 'DeleteMarkers' in object_response_itr:
for delete_marker in object_response_itr['DeleteMarkers']:
delete_marker_list.append({'Key': delete_marker['Key'], 'VersionId': delete_marker['VersionId']})
if 'Versions' in object_response_itr:
for version in object_response_itr['Versions']:
version_list.append({'Key': version['Key'], 'VersionId': version['VersionId']})
for i in range(0, len(delete_marker_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': delete_marker_list[i:i+1000],
'Quiet': True
}
)
print(response)
for i in range(0, len(version_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': version_list[i:i+1000],
'Quiet': True
}
)
print(response)
The documentation is helpful here:
When versioning is enabled in an S3 bucket, a simple DeleteObject request cannot permanently delete an object from that bucket. Instead, Amazon S3 inserts a delete marker (which is effectively a new version of the object with its own version ID).
When you try to GET an object whose current version is a delete marker, S3 behaves as if the object has been deleted (even though it has not) and returns a 404 error.
To permanently delete an object from a versioned bucket, use DeleteObject, with the relevant version ID, for each and every version of the object (and that includes the delete markers).
I had trouble using the other solutions to this question so here's mine.
import boto3
bucket = "bucket name goes here"
filename = "filename goes here"
client = boto3.client('s3')
paginator = client.get_paginator('list_object_versions')
response_iterator = paginator.paginate(Bucket=bucket)
for response in response_iterator:
versions = response.get('Versions', [])
versions.extend(response.get('DeleteMarkers', []))
for version_id in [x['VersionId'] for x in versions
if x['Key'] == filename and x['VersionId'] != 'null']:
print('Deleting {} version {}'.format(filename, version_id))
client.delete_object(Bucket=bucket, Key=filename, VersionId=version_id)
This code deals with the cases where
object versioning isn't actually turned on
there are DeleteMarkers
there are no DeleteMarkers
there are more versions of a given file than fit in a single API response
Mahesh Mogal's answer doesn't delete DeleteMarkers. Mangohero1's answer fails if the object is missing a DeleteMarker. Hari's answer repeats 10 times (to workaround missing pagination logic).
You can use object_versions.
def delete_all_versions(bucket_name: str, prefix: str):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
if prefix is None:
bucket.object_versions.delete()
else:
bucket.object_versions.filter(Prefix=prefix).delete()
delete_all_versions("my_bucket", None) # empties the entire bucket
delete_all_versions("my_bucket", "my_prefix/") # deletes all objects matching the prefix (can be only one if only one matches)
As a supplement to #jarmod's answer, here is a way I developed a workaround to "hard deleting" an object (with delete markered objects included);
def get_all_versions(bucket, filename):
s3 = boto3.client('s3')
keys = ["Versions", "DeleteMarkers"]
results = []
for k in keys:
response = s3.list_object_versions(Bucket=bucket)[k]
to_delete = [r["VersionId"] for r in response if r["Key"] == filename]
results.extend(to_delete)
return results
bucket = "YOUR BUCKET NAME"
file = "YOUR FILE"
for version in get_all_versions(bucket, file):
s3.delete_object(Bucket=bucket, Key=file, VersionId=version)
Fewer line solution.
import boto3
def delete_versions(bucket, objects=None): # `objects` is either list of str or None
bucket = boto3.resource('s3').Bucket(bucket)
if objects: # delete specified objects
[version.delete() for version in bucket.object_versions.all() if version.object_key in objects]
else: # or delete all objects in `bucket`
[version.delete() for version in bucket.object_versions.all()]
To delete all versions of an object or objects under a prefix:
Pass the object key /folder/filename or prefix /folder/subfolder/ to the Prefix
import boto3
s3 = boto3.resource('s3')
bucket = s3.Bucket("my-bucket-name")
bucket.object_versions.filter(Prefix="folder/subfolder/").delete()
This post was super helpful without this we would have spent tremendous amount of time cleaning up our S3 folders.
We had a requirement to clean up specific folders only. So I tried the following code and it worked like a charm. Also note that I am iterating through the 10 times to delete more than 1000 objects limit that function has. Feel free to modify the limit as you wish.
import boto3
session = boto3.Session(aws_access_key_id='<YOUR ACCESS KEY>',aws_secret_access_key='<YOUR SECRET KEY>')
bucket_name = '<BUCKET NAME>'
object_name = '<KEY NAME>'
s3 = session.client('s3')
for i in range(10):
versions = s3.list_object_versions (Bucket = bucket_name, Prefix = object_name)
#print (versions)
version_list = versions.get('Versions')
for version in version_list:
keyName = version.get('Key')
versionId = version.get('VersionId')
print (keyName + ':' + versionId)
s3.delete_object(Bucket = bucket_name, Key= keyName, VersionId = versionId)
marker_list = versions.get('DeleteMarkers')
#print(marker_list)
for marker in marker_list:
keyName1 = marker.get('Key')
versionId1 = marker.get('VersionId')
print (keyName1 + ':' + versionId1)
s3.delete_object(Bucket = bucket_name, Key= keyName1, VersionId = versionId1)
this script will delete all version of all object with prefix -
s3 = boto3.resource("s3")
client = boto3.client("s3")
s3_bucket = s3.Bucket(bucket_name)
for obj in s3_bucket.objects.filter(Prefix=""):
response = client.list_object_versions(Bucket=bucket_name, Prefix=obj.key)
while "Versions" in response:
to_delete = [
{"Key": ver["Key"], "VersionId": ver["VersionId"]}
for ver in response["Versions"]
]
delete = {"Objects": to_delete}
client.delete_objects(Bucket=bucket_name, Delete=delete)
response = client.list_object_versions(Bucket=bucket_name, Prefix=obj.key)
client.delete_object(Bucket=bucket_name, Key=obj.key)
Easiest way:
import boto3
bucket = boto3.resource("s3").Bucket("mybucket")
bucket.object_versions.all().delete()
You can delete an object with all of its versions using following code
session = boto3.Session(aws_access_key_id, aws_secret_access_key)
bucket_name = 'bucket_name'
object_name = 'object_name'
s3 = session.client('s3')
versions = s3.list_object_versions (Bucket = bucket_name, Prefix = object_name)
version_list = versions.get('Versions')
for version in version_list:
versionId = version.get('VersionId')
s3.delete_object(Bucket = bucket_name, Key= object_name, VersionId = versionId)
The rest of the answers all miss something. Either using the Prefix parameter, or deleting delete markers, or handling errors...
s3 = boto3.client('s3')
response = s3.list_object_versions(Bucket=bucket_name, Prefix=key)
objects_to_delete = []
# Note that we do not use pagination because we assume the file has less than max versions (something like 300)
# Note that we also traverse delete markers.
for obj in itertools.chain(response.get("Versions", []), response.get("DeleteMarkers", [])):
# NOTE: This is super stupid, but AWS has no API for list_object_versions for a single object, only with prefix.
# So other objects who share the same prefix (e.g "blaze/a.txt" and "bla.json" will also be listed when asking for "bla").
# So we need to be careful here
if obj["Key"] != key:
break
objects_to_delete.append({"Key": obj["Key"], 'VersionId': obj['VersionId']})
if len(objects_to_delete) == 0:
raise FileNotFoundError(f'File {key} not found at bucket {bucket_name}')
deletion_response = s3.delete_objects(Bucket=bucket_name, Delete={"Objects": objects_to_delete, "Quiet": False})
errors = deletion_response.get("Errors", [])
if len(errors) > 0:
raise Exception(f'Failed deleting file {key} from bucket {bucket_name}. Result: {deletion_response}')
I'm trying to upload an image into S3 bucket using boto. After the image has successfully uploaded, I want to perform a certain operation using the file URL of the image in the S3 bucket. The problem is that sometimes the image doesn't upload fast enough and I end up with a server error when I want to perform the operation dependent on the file URL of the Image.
This is my source code. I'm using python flask.
def search_test(consumer_id):
consumer = session.query(Consumer).filter_by(consumer_id=consumer_id).one()
products = session.query(Product).all()
product_dictionary = {'Products': [p.serialize for p in products]}
if request.method == 'POST':
p_product_image_url = request.files['product_upload_url']
s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = s3.get_bucket(AWS_BUCKET_NAME)
k = Key(bucket)
if p_product_image_url and allowed_file(p_product_image_url.filename):
# Read the contents of the file
file_content = p_product_image_url.read()
# Use Boto to upload the file to S3
k.set_metadata('Content-Type', mimetypes.guess_type(p_product_image_url.filename))
k.key = secure_filename(p_product_image_url.filename)
k.set_contents_from_string(file_content)
print ('consumer search upload successful')
new_upload = Uploads(picture_upload_url=k.key.replace(' ', '+'), consumer=consumer)
session.add(new_upload)
session.commit()
new_result = jsonify(Result=perform_actual_search(amazon_s3_base_url + k.key.replace(' ', '+'),
product_dictionary))
return new_result
else:
return render_template('upload_demo.html', consumer_id=consumer_id)
The jsonify method needs a valid image url to perform the operation. It works sometimes, sometimes it doesn't. The reason I suspect being due to the issue that the image would not have uploaded yet by the time it executes that line of code.
The perform_actual_search method is as follows:
def get_image_search_results(image_url):
global description
url = ('http://style.vsapi01.com/api-search/by-url/?apikey=%s&url=%s' % (just_visual_api_key, image_url))
h = httplib2.Http()
response, content = h.request(url,
'GET') # alternatively write content=h.request((url,'GET')[1]) ///Numbr 2 in our array
result = json.loads(content)
result_dictionary = []
for i in range(0, 10):
if result:
try:
if result['errorMessage']:
result_dictionary = []
except:
pass
if result['images'][i]:
images = result['images'][i]
jv_img_url = images['imageUrl']
title = images['title']
try:
if images['description']:
description = images['description']
else:
description = "no description"
except:
pass
# print("\njv_img_url: %s,\ntitle: %s,\ndescription: %s\n\n"% (
# jv_img_url, title, description))
image_info = {
'image_url': jv_img_url,
'title': title,
'description': description,
}
result_dictionary.append(image_info)
if result_dictionary != []:
# for i in range(len(result_dictionary)):
# print (result_dictionary[i])
# print("\n\n")
return result_dictionary
else:
return []
def performSearch(jv_input_dictionary, imagernce_products_dict):
print jv_input_dictionary
print imagernce_products_dict
global common_desc_ratio
global isReady
image_search_results = []
if jv_input_dictionary != []:
for i in range(len(jv_input_dictionary)):
print jv_input_dictionary[i]
for key in jv_input_dictionary[i]:
if key == 'description':
input_description = jv_input_dictionary[i][key]
s1w = re.findall('\w+', input_description.lower())
s1count = Counter(s1w)
print input_description
for j in imagernce_products_dict:
if j == 'Products':
for q in range(len(imagernce_products_dict['Products'])):
for key2 in imagernce_products_dict['Products'][q]:
if key2 == 'description':
search_description = imagernce_products_dict['Products'][q]['description']
print search_description
s2w = re.findall('\w+', search_description.lower())
s2count = Counter(s2w)
# Commonality magic
common_desc_ratio = difflib.SequenceMatcher(None, s1w, s2w).ratio()
print('Common ratio is: %.2f' % common_desc_ratio)
if common_desc_ratio > 0.09:
image_search_results.append(imagernce_products_dict['Products'][q])
if image_search_results:
print image_search_results
return image_search_results
else:
return {'404': 'No retailers registered with us currently own this product.'}
def perform_actual_search(image_url, imagernce_product_dictionary):
return performSearch(get_image_search_results(image_url), imagernce_product_dictionary)
Any help solving this would be greatly appreciated.
I would configure S3 to generate notifications on events such as s3:ObjectCreated:*
Notifications can be posted to an SNS topic, a SQS queue or directly trigger a lambda function.
More details about S3 notifications : http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html
You should rewrite your code to separate the upload part and the image processing part. The later can be implemented as a Lambda function in Python.
Working in an Asynchronous way is key here, writing blocking code is usually not scalable.
you can compare bytes written to s3 with file size. lets say you use following method to write to s3:
bytes_written = key.set_contents_from_file(file_binary, rewind=True)
in your case it's set_contents_from_string
then I would compare, bytes_written with p_product_image_url.seek(0, os.SEEK_END)
if they match. whole file has been uploaded to s3.