s3 upload from base64 using Lambda - python

I have the following code:
import base64
imgdata = base64.b64decode(new_string)
filename = 'image.jpg' # I assume you have a way of picking unique filenames
with open(filename, 'wb') as f:
f.write(imgdata)
It saves a file as jpg and I can open it.
How can I upload this to s3 bucket, or any other service, and return a URL Security is not an issue.
I tried
try:
convertedFileString = fstring.replace('-', '+').replace('_','/').replace(',','=')
imgdata = base64.b64decode(new_string)
# I assume you have a way of picking unique filenames
with open(filename, 'wb') as f:
s3 = boto3.resource('s3')
bucket = s3.Bucket('ag-grid')
bucket.put_object(Key=filename, Body=f)
except Exception as e:
return {
'statusCode': 500,
'body': str(e)
}

So, I'm not sure if I understand your problem.
I use the code below to upload and it's working.
# Filename
new_name = '{}_{}_{}_{}_{}_{}x{}.{}'.format(cid, uid, id_service_order, id_question, uuid.uuid4(), 0, 0,
fileExtension) # type: str
key = "{}".format(new_name)
# Let's use Amazon S3
s3 = boto3.client("s3",
aws_access_key_id=aws_config.aws_access_key_id,
aws_secret_access_key=aws_config.aws_secret_access_key,
region_name=aws_config.aws_s3_region,
config=Config(signature_version='s3v4'))
dec = base64.b64decode(img_base64)
rs = s3.put_object(
Bucket=aws_config.aws_s3_bucket,
Key=key,
ContentType=fileType,
Body=dec,
ACL='public-read'
)
print(rs)
print(new_name)
Does that help you?

Related

Modify Json File from one S3 bucket and send to another S3 bucket through Lamda

Does anyone know how I can modify the following code to get the json file from one s3 bucket, modify it and send it to another s3 bucket in python
import json
import boto3
s3_client = boto3.client('s3')
def lambda_handler(event, context):
# First we will fetch bucket name from event json object
bucket = event['Records'][0]['s3']['bucket']['name']
# Now we will fetch file name which is uploaded in s3 bucket from event json object
json_file_name = event['Records'][0]['s3']['object']['key']
#Lets call get_object() function which Retrieves objects from Amazon S3 as dictonary
json_object = s3_client.get_object(Bucket=bucket,Key=json_file_name)
# Lets decode the json object returned by function which will retun string
file_reader = json_object['Body'].read().decode("utf-8")
# We will now change this json string to dictonary
openAWSEC2PricesJson = json.loads(file_reader)
openReferenceUUIDAWSEC2Prices = open("./assets/referenceUUIDAwsEC2Prices.json", "r")
openReferenceUUIDAWSEC2PricesJson = json.load(openReferenceUUIDAWSEC2Prices)
openReferenceUUIDAWSEC2Prices.close()
for i in openAWSEC2PricesJson:
for j in openReferenceUUIDAWSEC2PricesJson:
grouping_code = str(i['region']+'_'+i['operatingSystem']+'_'+i['name'])
if grouping_code == j['groupingCode']:
id = j['uniqueID']
i['id'] = id
if 'id' not in i:
id_new = uuid.uuid4()
i['id'] = str(id_new)
grouping_code_new = str(i['region']+'_'+i['operatingSystem']+'_'+i['name'])
res = {}
res['groupingCode'] = grouping_code_new
res['uniqueID'] = str(id_new)
openReferenceUUIDAWSEC2PricesJson.append(res)
writeAWSEC2Prices = open("awsEC2Pricebook.json", "w")
json.dump(openAWSEC2PricesJson, writeAWSEC2Prices)
writeAWSEC2Prices.close()
writeReferenceUUIDAWSEC2Prices = open("./assets/referenceUUIDAwsEC2Prices.json", "w")
json.dump(openReferenceUUIDAWSEC2PricesJson, writeReferenceUUIDAWSEC2Prices)
writeReferenceUUIDAWSEC2Prices.close()
Currently I get the following error when I test it:
"errorMessage": "[Errno 30] Read-only file system: 'awsEC2Pricebook.json'",
Can you try to store "awsEC2Pricebook.json" to "/tmp/awsEC2Pricebook.json" and see if this resolves it?
writeAWSEC2Prices = open("/tmp/awsEC2Pricebook.json", "w")

How to read all the files from a directory in s3 bucket using Python in cloud functions

Here is my code: I am trying to read all the files of the same format from the s3 bucket
Error : "Could not establish source connection [Errno 2] No such file or directory: '/user_code/s3:/"
def s3_file_read(self,source)
bucket_name = 'xxx'
region='xxx'
object_name = 's3-folder-name/'
ACCESS_KEY_ID = 'xxx'
ACCESS_SECRET_KEY = 'xxx'
s3_client = boto3.client('s3',aws_access_key_id=ACCESS_KEY_ID,aws_secret_access_key=ACCESS_SECRET_KEY,region_name=region)
file_path = "s3://your-bucket-name/folder-name/"
prefix = os.path.abspath(file_path)
file_list = [os.path.join(prefix, f) for f in os.listdir(prefix) if f.endswith('.csv')]
print('##################################Reading the file#############################')
file_type = source['fileType'].lower()
if source['fileType'] == 'csv':
try:
obj = s3_client.get_object(Bucket= bucket_name, Key= object_name)
file_df = pd.read_csv(obj['Body'])
print("CSV File read success")
except Exception as e:
print("Could not read the file {}".format(e))
else:
print("File format supported CSV")
[1]: https://i.stack.imgur.com/6pX8d.png
I've made some assumptions about what you'd like to do here, but this code will read the keys in a bucket, and create a list of .csv objects only. Then you can read that list and test if a dataframe can be created. If you want to read all those files into one larger dataframe then the end of your function needs to be rewritten.
s3sr = boto3.resource('s3')
#there are other examples of collecting objects, this is just what I use
def get_keys_from_prefix(self, bucket, prefix):
'''gets list of keys for given bucket and prefix'''
keys_list = []
paginator = s3sr.meta.client.get_paginator('list_objects_v2')
# use Delimiter to limit search to that level of hierarchy
for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
keys = [content['Key'] for content in page.get('Contents')]
print('keys in page: ', len(keys))
keys_list.extend(keys)
return keys_list
def s3_file_read(self,source):
bucket_name = 'xxx'
region='xxx'
prefix = 's3-folder-name/' # if no prfex, pass ''
ACCESS_KEY_ID = 'xxx'
ACCESS_SECRET_KEY = 'xxx'
s3_client = boto3.client('s3',aws_access_key_id=ACCESS_KEY_ID,aws_secret_access_key=ACCESS_SECRET_KEY,region_name=region)
keys_list = self.get_keys_from_prefix(bucket_name, prefix)
csv_list = [f for f in keys_list if f.endswith('.csv')]
for csvfile in csv_list:
try:
obj = s3_client.get_object(Bucket= bucket_name, Key= csvfile)
file_df = pd.read_csv(obj['Body'])
print("CSV File read success")
except Exception as e:
print("Could not read the file {}".format(e))

Creating a tar stream in memory from multiple file byte streams

I'm trying to create a tar stream in memory add files to it and then save it to S3. But there is some issue and the files inside the ta have zero size. Can any one please advise? Code snippet below-
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
info = tarfile.TarInfo(_file_name)
info.size = obj["ContentLength"]
info.mtime = s3.head_object(Bucket=bucket, Key=key)['LastModified'].timestamp()
tar_file_obj.addfile(info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
For others, looking to do the same for s3 object
Okay apparently when adding byte streams to a tar, we need to explicitly specify the size.
Sample code-
import tarfile
import uuid
import io
import os
def tar_and_upload():
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
for filename in os.listdir("images"):
print(filename)
file_path = os.path.join("images", filename)
#tar_file_obj.add(file_path)
with open(file_path, "rb") as f:
_bytes = f.read()
tar_info = tarfile.TarInfo(filename)
tar_info.size = len(_bytes)
tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
object_path = os.path.join("temp", obj_name)
with open(object_path, "wb") as f:
f.write(file_obj.getvalue())
print(obj_name)
except Exception as e:
print(str(e))
if __name__ == "__main__":
tar_and_upload()

How to upload url to s3 bucket using StringIO and put_object method with boto3

I need to upload URLs to an s3 bucket and am using boto3. I thought I had a solution with this question: How to save S3 object to a file using boto3 but when I go to download the files, I'm still getting errors. The goal is for them to download as audio files, not URLs. My code:
for row in list_reader:
media_id = row['mediaId']
external_id = row['externalId']
with open('10-17_res1.csv', 'a') as results_file:
file_is_empty = os.stat('10-17_res1.csv').st_size == 0
results_writer = csv.writer(
results_file, delimiter = ',', quotechar = '"'
)
if file_is_empty:
results_writer.writerow(['fileURL','key', 'mediaId','externalId'])
key = 'corpora/' + external_id + '/' + external_id + '.flac'
bucketname = 'my_bucket'
media_stream = media.get_item(media_id)
stream_url = media_stream['streams'][0]['streamLocation']
fake_handle = StringIO(stream_url)
s3c.put_object(Bucket=bucketname, Key=key, Body=fake_handle.read())
My question is, what do I need to change so that the file is saved in s3 as an audio file, not a URL?
I solved this by using the smart_open module:
with smart_open.open(stream_url, 'rb',buffering=0) as f:
s3.put_object(Bucket=bucketname, Key=key, Body=f.read())
Note that it won't work without the 'buffering=0' parameter.

Use AWS lambda function to convert S3 file from zip to gzip using boto3 python

I need to convert a .zip file from S3 to a .gzip file using boto3 python in an AWS lambda function. Any suggestions on how to do this?
Here is what I have so far:
import json
import boto3
import zipfile
import gzip
s3 = boto3.resource('s3')
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
try:
s3Obj = s3.Object(bucket_name=bucket, key=key)
response = s3Obj.get()
data = response['Body'].read()
zipToGzip = gzip.open(data, 'wb')
zipToGzip.write(s3.upload_file(bucket, (s3 + '.gz')))
zipToGzip.close()
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
OK, got it figured out. Thanks for your input Lee.
import json
import boto3
import zipfile
import gzip
print('Loading function')
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
def lambda_handler(event, context):
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
try:
s3_client.download_file(bucket, key, '/tmp/file.zip')
zfile = zipfile.ZipFile('/tmp/file.zip')
namelist = zfile.namelist()
if len(namelist) >1:
pass
#alertme()
for filename in namelist:
data = zfile.read(filename)
f = open('/tmp/' + str(filename), 'wb')
f.write(data)
f.close()
zipToGzip = gzip.open('/tmp/data.gz', 'wb')
zipToGzip.write(data)
zipToGzip.close()
s3_client.upload_file('/tmp/data.gz', bucket, key + '.gz')
s3_client.delete_object(Bucket=bucket, Key=key)
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e

Categories

Resources