Python: Passing a variable to another function - python

I am really new to Python and was hoping someone could help me with this. I have a function (an AWS Lamdba function actually) that I need to pass a variable value after it is created. I'm doing this to create a campaign in Pinpoint when a segment is created.
import os
import time
import boto3
from botocore.exceptions import ClientError
from datetime import datetime,timedelta
AWS_REGION = os.environ['region']
projectId = os.environ['projectId']
importRoleArn = os.environ['importRoleArn']
def lambda_handler(event, context):
print("Received event: " + str(event))
for record in event['Records']:
# Assign some variables to make it easier to work with the data in the
# event recordi
bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
folder = os.path.split(key)[0]
folder_path = os.path.join(bucket, folder)
full_path = os.path.join(bucket, key)
s3_url = "s3://" + folder_path
# print(full_path);
# Check to see if all file parts have been processed.
if all_files_processed(bucket, folder, full_path):
# If you haven't recently run an import job that uses a file stored in
# the specified S3 bucket, then create a new import job. This prevents
# the creation of duplicate segments.
if not (check_import_jobs(bucket, folder, s3_url, full_path)):
segmentID = create_import_job(s3_url, full_path)
create_campaign(segmentID)
else:
print("Import job found with URL s3://"
+ os.path.join(bucket,folder) + ". Aborting.")
else:
print("Parts haven't finished processing yet.")
# Determine if all of the file parts have been processed.
def all_files_processed(bucket, folder, full_path):
# Use the "__ofN" part of the file name to determine how many files there
# should be.
number_of_parts = int((full_path.split("__of")[1]).split("_processed")[0])
# Figure out how many keys contain the prefix for the current batch of
# folders (basically, how many files are in the appropriate "folder").
client = boto3.client('s3')
objs = client.list_objects_v2(Bucket=bucket,Prefix=folder)
file_count = objs['KeyCount']
ready_for_import = False
if file_count == number_of_parts:
ready_for_import = True
return ready_for_import
# Check Amazon Pinpoint to see if any import jobs have been created by using
# the same S3 folder.
def check_import_jobs(bucket, folder, s3_url, full_path):
url_list = []
print(s3_url);
# Retrieve a list of import jobs for the current project ID.
client = boto3.client('pinpoint')
try:
client_response = client.get_import_jobs(
ApplicationId=projectId
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
segment_response = client_response['ImportJobsResponse']['Item']
#print(segment_response);
# Parse responses. Add all S3Url values to a list.
for item in segment_response:
#print(item);
s3_url_existing = full_path
url_list.append(s3_url_existing)
#print(url_list);
# Search for the current S3 URL in the list.
if s3_url in url_list:
found = True
else:
found = False
print(found);
return found
# Create the import job in Amazon Pinpoint.
def create_import_job(s3_url, full_path):
client = boto3.client('pinpoint')
segment_name = s3_url.split('/')[4]
try:
response = client.create_import_job(
ApplicationId=projectId,
ImportJobRequest={
'DefineSegment': True,
'Format': 'CSV',
'RegisterEndpoints': True,
'RoleArn': importRoleArn,
'S3Url': s3_url,
'SegmentName': segment_name
}
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
print("Import job " + response['ImportJobResponse']['Id'] + " "
+ response['ImportJobResponse']['JobStatus'] + ".")
print("Segment ID: "
+ response['ImportJobResponse']['Definition']['SegmentId'])
print("Application ID: " + projectId)
return response['ImportJobResponse']['Definition']['SegmentId']
def create_campaign(segmentID):
client = boto3.client('pinpoint')
now = datetime.now()
dt_string = now.isoformat()
print(type(segmentID))
try:
response = client.create_campaign(
ApplicationId=projectId,
WriteCampaignRequest={
'Description': 'Test SMS Campaign 2',
'MessageConfiguration': {
'EmailMessage': {
'Body': 'This is a test 2',
'FromAddress': 'xxx#xxx.com',
'HtmlBody': '<p>Test 2</p>',
'Title': 'This is a test 2'
},
'SMSMessage': {
'Body': 'Thanks for your visit to {{Attributes.Provider_Name}} on {{Attributes.Clinical_Date_of_Service}}',
'MessageType': 'PROMOTIONAL',
'SenderId': 'XXX'
}
},
'Schedule': {
'Frequency': 'ONCE',
'IsLocalTime': True,
'StartTime': dt_string,
'Timezone': 'UTC'
},
'Name': 'Test Email Campaign 6',
'SegmentId': segmentID
}
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
print('Campaign Created')
The issue comes up in create_campaign where I want to send the SegmentID to. I end up getting the following error...
"Segment specified in SegmentId is not found"
I can print the segmentID to the console no problem, it's just getting it to pass to the function is the roadblock I'm hitting. Thanks in advance!

Related

How to fix aws lambda function logs error?

I'm trying to extract aws trust advisor data through lambda function(trigger by event scheduler) and upload to s3. However, some part of the function throws error. below is my code
##libraries
import boto3
import os
import csv
from csv import DictWriter
import time
import traceback
## bucket_name is set as env variable
bucket_name = "test-ta-reports"
fail_msg = 'Pulling Trusted Advisor data failed'
Filename = "/tmp/checks_list.csv"
obj_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '/' + '.csv'
##upload to s3
def s3_upload(bucket_name, Filename, obj_name):
if obj_name is None:
obj_name = os.path.basename(Filename)
try:
s3 = boto3.client("s3", region_name="eu-west-1")
response = s3.upload_file(Filename, bucket_name, obj_name)
return True
except:
print('Data failed to upload to bucket')
traceback.print_exc()
return False
def lambda_handler(event, context):
try:
support_client = boto3.client('support', region_name='us-east-1')
ta_checks = support_client.describe_trusted_advisor_checks(language='en')
checks_list = {ctgs: [] for ctgs in list(set([checks['category'] for checks in ta_checks['checks']]))}
for checks in ta_checks['checks']:
print('Getting check:' + checks['name'] + checks['category'])
try:
check_summary = support_client.describe_trusted_advisor_check_summaries(
checkIds=[checks['id']])['summaries'][0]
if check_summary['status'] != 'not_available':
checks_list[checks['category']].append(
[checks['name'], check_summary['status'],
str(check_summary['resourcesSummary']['resourcesProcessed']),
str(check_summary['resourcesSummary']['resourcesFlagged']),
str(check_summary['resourcesSummary']['resourcesSuppressed']),
str(check_summary['resourcesSummary']['resourcesIgnored'])
])
else:
print("unable to append checks")
except:
print('Failed to get check: ' + checks['name'])
traceback.print_exc()
except:
print('Failed! Debug further.')
traceback.print_exc()
##rewrite dict to csv
with open('/tmp/checks_list.csv', 'w', newline='') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=['status','hasFlaggedResources','timestamp','resourcesSummary','categorySpecificSummary', 'checkId'])
csv_writer.writeheader()
csv_writer.writerow(check_summary)
return checks_list
if s3_upload(bucket_name, Filename, obj_name):
print("Successfully uploaded")
if __name__ == '__main__':
lambda_handler(event, context)
The error logs
unable to append checks
I'm new to Python. So, unsure of how to check for trackback stacks under else: statement. Is there any way to modify this code for getting traceback logs for the append block. Also, have i made any error in the above code. I'm unable to figure out any. PLz help
response = client.describe_trusted_advisor_check_summaries(
checkIds=[
'string',
]
)
describe_trusted_advisor_check_summaries() returns summarized results for one or more Trusted advisors. Here you are checking for the check_summary['status'] is not equal to not_avaialble i.e. alert status of the check is either "ok" (green), "warning" (yellow), "error" (red), and in that case, you are appending resourcesProcessed, resourcesFlagged, resourcesSuppressed, and resourcesIgnored to checks_list for further processing.
it's printing
unable to append checks
just because the status of the check is not_available. It is not an error log. Just deal with the case if the check status is not_available, what you should be doing?
See the documentation of describe_trusted_advisor_check_summaries. https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/support.html#Support.Client.describe_trusted_advisor_check_summaries

Python lambda function returns ErrorType KeyError

i am creating a lambda function for Certification expiration Alert, and getting this error message when Test run
Response
{
"errorMessage": "'detail-type'",
"errorType": "KeyError",
"requestId": "5449b430-e32a-4645-93b0-f204e92ef6e6",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 25, in lambda_handler\n if (event ['detail-type'] == \"ACM Certificate Approaching Expiration\"):\n"
]
}
Function Logs
START RequestId: 5449b430-e32a-4645-93b0-f204e92ef6e6 Version: $LATEST
[ERROR] KeyError: 'detail-type'
Traceback (most recent call last):
  File "/var/task/lambda_function.py", line 25, in lambda_handler
    if (event ['detail-type'] == "ACM Certificate Approaching Expiration"):END RequestId: 5449b430-e32a-4645-93b0-f204e92ef6e6
REPORT RequestId: 5449b430-e32a-4645-93b0-f204e92ef6e6 Duration: 1.54 ms Billed Duration: 2 ms Memory Size: 128 MB Max Memory Used: 53 MB Init Duration: 233.48 ms
Request ID
5449b430-e32a-4645-93b0-f204e92ef6e6
here it is the full lambda funtion
import json
import boto3
import os
from datetime import datetime, timedelta, timezone
# -------------------------------------------
# setup global data
# -------------------------------------------
utc = timezone.utc
# make today timezone aware
today = datetime.now().replace(tzinfo=utc)
# set up time window for alert - default to 45 if its missing
if os.environ.get('EXPIRY_DAYS') is None:
expiry_days = 45
else:
expiry_days = int(os.environ['EXPIRY_DAYS'])
expiry_window = today + timedelta(days = expiry_days)
def lambda_handler(event, context):
# if this is coming from the ACM event, its for a single certificate
if (event ['detail-type'] == "ACM Certificate Approaching Expiration"):
response = handle_single_cert (event, context.invoked_function_arn)
# otherwise, we need to get all the expiring certs that are expiring from CloudWatch Metrics
else:
response = handle_multiple_certs(event, context.invoked_function_arn)
return {
'statusCode': 200,
'body': response
}
def handle_single_cert(event, context_arn):
cert_client = boto3.client('acm')
cert_details = cert_client.describe_certificate(CertificateArn=event['resources'][0])
result = 'The following certificate is expiring within ' + str(expiry_days) + ' days: ' + cert_details['Certificate']['DomainName']
# check the expiry window before logging to Security Hub and sending an SNS
if cert_details['Certificate']['NotAfter'] < expiry_window:
# This call is the text going into the SNS notification
result = result + ' (' + cert_details['Certificate']['CertificateArn'] + ') '
# this call is publishing to SH
result = result + ' - ' + log_finding_to_sh(event, cert_details, context_arn)
# if there's an SNS topic, publish a notification to it
if os.environ.get('SNS_TOPIC_ARN') is None:
response = result
else:
sns_client = boto3.client('sns')
response = sns_client.publish(TopicArn=os.environ['SNS_TOPIC_ARN'], Message=result, Subject='Certificate Expiration Notification')
return result
def handle_multiple_certs(event, context_arn):
cert_client = boto3.client('acm')
cert_list = json.loads(get_expiring_cert_arns())
if cert_list is None:
response = 'No certificates are expiring within ' + str(expiry_days) + ' days.'
else:
response = 'The following certificates are expiring within ' + str(expiry_days) + ' days: \n'
# loop through the cert list and pull out certs that are expiring within the expiry window
for csl in cert_list:
cert_arn = json.dumps(csl['Dimensions'][0]['Value']).replace('\"', '')
cert_details = cert_client.describe_certificate(CertificateArn=cert_arn)
if cert_details['Certificate']['NotAfter'] < expiry_window:
current_cert = 'Domain:' + cert_details['Certificate']['DomainName'] + ' (' + cert_details['Certificate']['CertificateArn'] + '), \n'
print(current_cert)
# this is publishing to SH
result = log_finding_to_sh(event, cert_details, context_arn)
# This is the text going into the SNS notification
response = response + current_cert
# if there's an SNS topic, publish a notification to it
if os.environ.get('SNS_TOPIC_ARN') is not None:
sns_client = boto3.client('sns')
response = sns_client.publish(TopicArn=os.environ['SNS_TOPIC_ARN'], Message=response.rstrip(', \n'), Subject='Certificate Expiration Notification')
return response
def log_finding_to_sh(event, cert_details, context_arn):
# setup for security hub
sh_region = get_sh_region(event['region'])
sh_hub_arn = "arn:aws:securityhub:{0}:{1}:hub/default".format(sh_region, event['account'])
sh_product_arn = "arn:aws:securityhub:{0}:{1}:product/{1}/default".format(sh_region, event['account'])
# check if security hub is enabled, and if the hub arn exists
sh_client = boto3.client('securityhub', region_name = sh_region)
try:
sh_enabled = sh_client.describe_hub(HubArn = sh_hub_arn)
# the previous command throws an error indicating the hub doesn't exist or lambda doesn't have rights to it so we'll stop attempting to use it
except Exception as error:
sh_enabled = None
print ('Default Security Hub product doesn\'t exist')
response = 'Security Hub disabled'
# This is used to generate the URL to the cert in the Security Hub Findings to link directly to it
cert_id = right(cert_details['Certificate']['CertificateArn'], 36)
if sh_enabled:
# set up a new findings list
new_findings = []
# add expiring certificate to the new findings list
new_findings.append({
"SchemaVersion": "2018-10-08",
"Id": cert_id,
"ProductArn": sh_product_arn,
"GeneratorId": context_arn,
"AwsAccountId": event['account'],
"Types": [
"Software and Configuration Checks/AWS Config Analysis"
],
"CreatedAt": event['time'],
"UpdatedAt": event['time'],
"Severity": {
"Original": '89.0',
"Label": 'HIGH'
},
"Title": 'Certificate expiration',
"Description": 'cert expiry',
'Remediation': {
'Recommendation': {
'Text': 'A new certificate for ' + cert_details['Certificate']['DomainName'] + ' should be imported to replace the existing imported certificate before expiration',
'Url': "https://console.aws.amazon.com/acm/home?region=" + event['region'] + "#/?id=" + cert_id
}
},
'Resources': [
{
'Id': event['id'],
'Type': 'ACM Certificate',
'Partition': 'aws',
'Region': event['region']
}
],
'Compliance': {'Status': 'WARNING'}
})
# push any new findings to security hub
if new_findings:
try:
response = sh_client.batch_import_findings(Findings=new_findings)
if response['FailedCount'] > 0:
print("Failed to import {} findings".format(response['FailedCount']))
except Exception as error:
print("Error: ", error)
raise
return json.dumps(response)
def get_expiring_cert_arns():
cert_list = []
# Create CloudWatch client
cloudwatch = boto3.client('cloudwatch')
paginator = cloudwatch.get_paginator('list_metrics')
for response in paginator.paginate(
MetricName='DaysToExpiry',
Namespace='AWS/CertificateManager',
Dimensions=[{'Name': 'CertificateArn'}],):
cert_list = cert_list + (response['Metrics'])
# return all certs that are expiring according to CW
return json.dumps(cert_list)
# function to setup the sh region
def get_sh_region(event_region):
# security hub findings may need to go to a different region so set that here
if os.environ.get('SECURITY_HUB_REGION') is None:
sh_region_local = event_region
else:
sh_region_local = os.environ['SECURITY_HUB_REGION']
return sh_region_local
# quick function to trim off right side of a string
def right(value, count):
# To get right part of string, use negative first index in slice.
return value[-count:]
[1]: https://i.stack.imgur.com/uGjJ4.png
You can first print the event to help you troubleshoot it by checking whether you are actually getting the key you expect ('detail-type'):
import json
print(f"Received: {json.dumps(obj=event, indent=2)})"
If there is a chance that you will not have 'detail-type', then you can either use try/except to deal with the key error, or use if event.get('detail-type') to guard against it.

Big Query table create and load data via Python

I'm trying to extract events from MixPanel, process it and then upload to BigQuery table(Creating a new table).
I googled for all the available resources but not useful in solving the issue.
Below is my code,
# Required modules import
import os
from mixpanel_api import Mixpanel
import collections
import json
from google.cloud import storage, bigquery
# Function to flatten exported file
def flatten(d, parent_key='', sep=''):
items = []
for k, v in d.items():
new_key = parent_key.replace("PROPERTIES","").replace("-","_").replace("[","").replace("]","").replace("/","").replace("\\","").replace("'","") + sep + k.replace(" ","").replace("_","").replace("$","").replace("-","_").replace("[","").replace("]","").replace("/","").replace("\\","").replace("'","") if parent_key else k
#new_key = parent_key.replace("PROPERTIES","").join(e for e in parent_key if e.isalnum()) + sep + k.join(e for e in k if e.isalnum()) if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key.upper(), sep=sep).items())
else:
items.append((new_key.upper().replace("-","_").replace("[","").replace("]","").replace("/","").replace("\\","").replace("'",""), v))
#items.append((new_key.upper().join(e for e in new_key if e.isalnum()), v))
#items.append(("ID","1"))
#items.append(("PROCESS_DATE",""))
#items.append(("DATA_DATE",""))
return dict(items)
# Start of execution point
if __name__ == '__main__':
# Secret and token to access API
api_sec = 'aa8af6b5ca5a5ed30e20f3af0acdfb2d'
api_tok = 'ad5234953e64b908bcd35388875324db'
# User input for date range and filename
start_date = str(input('Enter the start date(format: YYYY-MM-DD): '))
end_date = str(input('Enter the end date(format: YYYY-MM-DD): '))
file_name = str(input('Enter filename to store output: '))
file_formatter = str(input('Enter filename to store formatted output: '))
# Instantiating Mixpanel object
mpo = Mixpanel(api_sec,
api_tok
)
# Exporting events for the specified date range and storing in the filename provided, gunzip'ed file
mpo.export_events(file_name,
{'from_date':start_date,
'to_date':end_date
},
add_gzip_header=False,
raw_stream=True
)
# Dict for schema derived from file
schema_dict = {}
# Flatten file and write-out to another file
with open(file_name, 'r') as uf, open(file_formatter, 'a') as ff, open('schema_file', 'a') as sf:
#schema_list = []
for line in uf:
temp = flatten(json.loads(line))
for k in temp.keys():
if k not in schema_dict:
schema_dict[k] = "STRING"
#schema_list.append({"name" : k, "type" : "STRING"})
#ff.write(json.dumps(temp))
json.dump(temp, ff, indent = None, sort_keys = True) # Dumps each dictionary entry as a newline entry, even '{' '}' is on new lines
ff.write('\n') # Adds a new line after each object dump to file
#json.dump(schema_dict, sf, indent = None, sort_keys = True)
#json.dump(schema_list, sf, indent = None, sort_keys = True)
# Removing source file
if os.path.isfile(file_name):
sfr = os.remove(file_name)
if sfr == None:
print 'File ' +file_name+ ' removed from local storage'
else:
print 'File ' +file_name+ ' remove failed from local storage'
# Uploading file to Google bucket
client = storage.Client()
bucket = client.get_bucket('yathin-sample-bucket')
blob = bucket.blob(file_formatter)
status = blob.upload_from_filename(file_formatter)
if status == None:
print 'File ' +file_formatter+ ' upload success. Removing local copy.'
fr = os.remove(file_formatter)
if fr == None:
print 'File ' +file_formatter+ ' removed from local storage'
else:
print 'File ' +file_formatter+ ' remove failed from local storage'
# Loading file to BigQuery
client = bigquery.Client()
dataset_id = 'sample_dataset'
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [ bigquery.SchemaField(k,v) for k,v in schema_dict.items() ]
#job_config.autodetect = True
#job_config.create_dsiposition = 'CREATE_IF_NEEDED'
#job_config.write_disposition = 'WRITE_APPEND'
job_config.source_format = 'NEWLINE_DELIMITED_JSON'
uri = 'gs://yathin-sample-bucket/'+file_formatter
load_job = client.load_table_from_uri(
uri,
dataset_ref.table('test_json'),
job_config=job_config) # API request
#assert load_job.job_type == 'load'
#load_job.result() # Waits for table load to complete.
This code isn't returning any error, but table isn't getting created.
Can someone please help here by pointing out what is wrong.
It's possible that there is an error, but youre not returning the results in your script. I am not sure why you commented out load_job.result() but that is probably necessary to make sure the job completes.
If there still isn't an error this script can give you a list of your last jobs and the result with any error codes. Just change the max_results kwarg.
client = biquery.Client()
for job in client.list_jobs(max_results=1, all_users=False):
jobid = job.job_id
job = client.get_job(jobid)
print("------BIG QUERY JOB ERROR REASON", job.errors)
Also, Per your question in the comments about how to check to see if a table exists...
from google.cloud.exceptions import NotFound
client = bigquery.Client()
try:
dataset = client.dataset('DatasetName')
table_ref = dataset.table('TableName')
client.get_table(table_ref)
except: NotFound:
print('Table Not Found')

's3.Bucket' object has no attribute 'put': AttributeError

I am trying to download a file from an URL and upload the file in an S3 bucket.
My code is as follows-
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import xml.etree.ElementTree as etree
from datetime import datetime as dt
import os
import urllib
import requests
import boto3
from botocore.client import Config
from urllib.parse import urlparse
def lambda_handler(event, context):
""" Route the incoming request based on type (LaunchRequest, IntentRequest,
etc.) The JSON body of the request is provided in the event parameter.
"""
print('event.session.application.applicationId=' + event['session'
]['application']['applicationId'])
# if (event['session']['application']['applicationId'] !=
# "amzn1.echo-sdk-ams.app.[unique-value-here]"):
# raise ValueError("Invalid Application ID")
if event['session']['new']:
on_session_started({'requestId': event['request']['requestId'
]}, event['session'])
if event['request']['type'] == 'LaunchRequest':
return on_launch(event['request'], event['session'])
elif event['request']['type'] == 'IntentRequest':
return on_intent(event['request'], event['session'])
elif event['request']['type'] == 'SessionEndedRequest':
return on_session_ended(event['request'], event['session'])
def on_session_started(session_started_request, session):
""" Called when the session starts """
print('on_session_started requestId='
+ session_started_request['requestId'] + ', sessionId='
+ session['sessionId'])
def on_launch(launch_request, session):
""" Called when the user launches the skill without specifying what they
want
"""
print('on_launch requestId=' + launch_request['requestId']
+ ', sessionId=' + session['sessionId'])
# Dispatch to your skill's launch
return get_welcome_response()
def on_intent(intent_request, session):
""" Called when the user specifies an intent for this skill """
print('on_intent requestId=' + intent_request['requestId']
+ ', sessionId=' + session['sessionId'])
intent = intent_request['intent']
intent_name = intent_request['intent']['name']
# Dispatch to your skill's intent handlers
if intent_name == 'DownloadFiles':
return get_file(intent, session)
elif intent_name == 'AMAZON.HelpIntent':
return get_welcome_response()
else:
raise ValueError('Invalid intent')
def on_session_ended(session_ended_request, session):
""" Called when the user ends the session.Is not called when the skill returns should_end_session=true """
print('on_session_ended requestId='
+ session_ended_request['requestId'] + ', sessionId='
+ session['sessionId'])
# add cleanup logic here
# --------------- Functions that control the skill's behavior ------------------
def get_welcome_response():
""" If we wanted to initialize the session to have some attributes we could add those here """
session_attributes = {}
card_title = 'Welcome'
speech_output = \
"Welcome to file download Application. Please ask me to download files by saying, Ask downloader for download"
# If the user either does not reply to the welcome message or says something
# that is not understood, they will be prompted again with this text.
reprompt_text = \
"Please ask me to download files by saying, Ask downloader for download"
should_end_session = False
return build_response(session_attributes,
build_speechlet_response(card_title,
speech_output, reprompt_text,
should_end_session))
def get_file(intent, session):
""" Grabs the files from the path that have to be downloaded """
card_title = intent['name']
session_attributes = {}
should_end_session = True
username = '*'
password = '*'
ACCESS_KEY_ID = '*'
ACCESS_SECRET_KEY = '*+9'
BUCKET_NAME = 'lambda-file-upload'
url = 'https://drive.google.com/drive/xyz'
filename = os.path.basename(urlparse(url).path)
# urllib.urlretrieve(url, "code.zip")
r = requests.get(url, auth=(username, password))
if r.status_code == 200:
with open("/tmp/" + filename, 'wb') as out:
for bits in r.iter_content():
out.write(bits)
data = open("/tmp/" + filename, 'rb')
# S3 Connect
s3 = boto3.resource(
's3',
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY,
config=Config(signature_version='s3v4')
)
# Uploaded File
s3.Bucket(BUCKET_NAME).put(Key= filename, Body=data, ACL='public-read')
speech_output = "The file" + filename + "has been downloaded"
reprompt_text = ""
return build_response(session_attributes,
build_speechlet_response(card_title,
speech_output, reprompt_text,
should_end_session))
# --------------- Helpers that build all of the responses ----------------------
def build_speechlet_response(
title,
output,
reprompt_text,
should_end_session,
):
return {
'outputSpeech': {'type': 'PlainText', 'text': output},
'card': {'type': 'Simple', 'title': 'SessionSpeechlet - ' \
+ title, 'content': 'SessionSpeechlet - ' + output},
'reprompt': {'outputSpeech': {'type': 'PlainText',
'text': reprompt_text}},
'shouldEndSession': should_end_session,
}
def build_response(session_attributes, speechlet_response):
return {'version': '1.0', 'sessionAttributes': session_attributes,
'response': speechlet_response}
I am getting the following error: -
's3.Bucket' object has no attribute 'put': AttributeError
I am trying to create an Alexa Skill which will download file from an URL. Hence I created a lambda function.I am new to AWS lambda and S3. I would really appreciate some help.
As per the official docs here, instead of
s3.Bucket(BUCKET_NAME).put(Key= filename, Body=data, ACL='public-read')
you should use put_object() function as
s3.Bucket(BUCKET_NAME).put_object(Key= filename, Body=data, ACL='public-read')
Bucket sub-resource of boto3 doesn't have an API method as put. That's why you are getting this error. The error is self-explanatory in that matter.
's3.Bucket' object has no attribute 'put'.
Use the provided API method put_object instead. Or you can simply replace the line
s3.Bucket(BUCKET_NAME).put(Key= filename, Body=data, ACL='public-read')
with
s3.Bucket(BUCKET_NAME).put_object(Key=filename, Body=data, ACL='public-read')
For more information about the method syntax or parameters, visit Boto3 Documentation of S3
Had similar issue due to different reason:
//This will give error while getting objects:
s3 = boto3.client('s3', region_name='us-east-1')
bucket = 'mybucketname'
//This works while getting objects:
s3 = boto3.resource('s3')
bucket = s3.Bucket('mybucketname')
//now get objects:
for object in bucket.objects.filter(Prefix="foldername/):
key = object.key

How to check if a file has completed uploading into S3 Bucket using Boto in Python?

I'm trying to upload an image into S3 bucket using boto. After the image has successfully uploaded, I want to perform a certain operation using the file URL of the image in the S3 bucket. The problem is that sometimes the image doesn't upload fast enough and I end up with a server error when I want to perform the operation dependent on the file URL of the Image.
This is my source code. I'm using python flask.
def search_test(consumer_id):
consumer = session.query(Consumer).filter_by(consumer_id=consumer_id).one()
products = session.query(Product).all()
product_dictionary = {'Products': [p.serialize for p in products]}
if request.method == 'POST':
p_product_image_url = request.files['product_upload_url']
s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = s3.get_bucket(AWS_BUCKET_NAME)
k = Key(bucket)
if p_product_image_url and allowed_file(p_product_image_url.filename):
# Read the contents of the file
file_content = p_product_image_url.read()
# Use Boto to upload the file to S3
k.set_metadata('Content-Type', mimetypes.guess_type(p_product_image_url.filename))
k.key = secure_filename(p_product_image_url.filename)
k.set_contents_from_string(file_content)
print ('consumer search upload successful')
new_upload = Uploads(picture_upload_url=k.key.replace(' ', '+'), consumer=consumer)
session.add(new_upload)
session.commit()
new_result = jsonify(Result=perform_actual_search(amazon_s3_base_url + k.key.replace(' ', '+'),
product_dictionary))
return new_result
else:
return render_template('upload_demo.html', consumer_id=consumer_id)
The jsonify method needs a valid image url to perform the operation. It works sometimes, sometimes it doesn't. The reason I suspect being due to the issue that the image would not have uploaded yet by the time it executes that line of code.
The perform_actual_search method is as follows:
def get_image_search_results(image_url):
global description
url = ('http://style.vsapi01.com/api-search/by-url/?apikey=%s&url=%s' % (just_visual_api_key, image_url))
h = httplib2.Http()
response, content = h.request(url,
'GET') # alternatively write content=h.request((url,'GET')[1]) ///Numbr 2 in our array
result = json.loads(content)
result_dictionary = []
for i in range(0, 10):
if result:
try:
if result['errorMessage']:
result_dictionary = []
except:
pass
if result['images'][i]:
images = result['images'][i]
jv_img_url = images['imageUrl']
title = images['title']
try:
if images['description']:
description = images['description']
else:
description = "no description"
except:
pass
# print("\njv_img_url: %s,\ntitle: %s,\ndescription: %s\n\n"% (
# jv_img_url, title, description))
image_info = {
'image_url': jv_img_url,
'title': title,
'description': description,
}
result_dictionary.append(image_info)
if result_dictionary != []:
# for i in range(len(result_dictionary)):
# print (result_dictionary[i])
# print("\n\n")
return result_dictionary
else:
return []
def performSearch(jv_input_dictionary, imagernce_products_dict):
print jv_input_dictionary
print imagernce_products_dict
global common_desc_ratio
global isReady
image_search_results = []
if jv_input_dictionary != []:
for i in range(len(jv_input_dictionary)):
print jv_input_dictionary[i]
for key in jv_input_dictionary[i]:
if key == 'description':
input_description = jv_input_dictionary[i][key]
s1w = re.findall('\w+', input_description.lower())
s1count = Counter(s1w)
print input_description
for j in imagernce_products_dict:
if j == 'Products':
for q in range(len(imagernce_products_dict['Products'])):
for key2 in imagernce_products_dict['Products'][q]:
if key2 == 'description':
search_description = imagernce_products_dict['Products'][q]['description']
print search_description
s2w = re.findall('\w+', search_description.lower())
s2count = Counter(s2w)
# Commonality magic
common_desc_ratio = difflib.SequenceMatcher(None, s1w, s2w).ratio()
print('Common ratio is: %.2f' % common_desc_ratio)
if common_desc_ratio > 0.09:
image_search_results.append(imagernce_products_dict['Products'][q])
if image_search_results:
print image_search_results
return image_search_results
else:
return {'404': 'No retailers registered with us currently own this product.'}
def perform_actual_search(image_url, imagernce_product_dictionary):
return performSearch(get_image_search_results(image_url), imagernce_product_dictionary)
Any help solving this would be greatly appreciated.
I would configure S3 to generate notifications on events such as s3:ObjectCreated:*
Notifications can be posted to an SNS topic, a SQS queue or directly trigger a lambda function.
More details about S3 notifications : http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html
You should rewrite your code to separate the upload part and the image processing part. The later can be implemented as a Lambda function in Python.
Working in an Asynchronous way is key here, writing blocking code is usually not scalable.
you can compare bytes written to s3 with file size. lets say you use following method to write to s3:
bytes_written = key.set_contents_from_file(file_binary, rewind=True)
in your case it's set_contents_from_string
then I would compare, bytes_written with p_product_image_url.seek(0, os.SEEK_END)
if they match. whole file has been uploaded to s3.

Categories

Resources