How to fix aws lambda function logs error? - python

I'm trying to extract aws trust advisor data through lambda function(trigger by event scheduler) and upload to s3. However, some part of the function throws error. below is my code
##libraries
import boto3
import os
import csv
from csv import DictWriter
import time
import traceback
## bucket_name is set as env variable
bucket_name = "test-ta-reports"
fail_msg = 'Pulling Trusted Advisor data failed'
Filename = "/tmp/checks_list.csv"
obj_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '/' + '.csv'
##upload to s3
def s3_upload(bucket_name, Filename, obj_name):
if obj_name is None:
obj_name = os.path.basename(Filename)
try:
s3 = boto3.client("s3", region_name="eu-west-1")
response = s3.upload_file(Filename, bucket_name, obj_name)
return True
except:
print('Data failed to upload to bucket')
traceback.print_exc()
return False
def lambda_handler(event, context):
try:
support_client = boto3.client('support', region_name='us-east-1')
ta_checks = support_client.describe_trusted_advisor_checks(language='en')
checks_list = {ctgs: [] for ctgs in list(set([checks['category'] for checks in ta_checks['checks']]))}
for checks in ta_checks['checks']:
print('Getting check:' + checks['name'] + checks['category'])
try:
check_summary = support_client.describe_trusted_advisor_check_summaries(
checkIds=[checks['id']])['summaries'][0]
if check_summary['status'] != 'not_available':
checks_list[checks['category']].append(
[checks['name'], check_summary['status'],
str(check_summary['resourcesSummary']['resourcesProcessed']),
str(check_summary['resourcesSummary']['resourcesFlagged']),
str(check_summary['resourcesSummary']['resourcesSuppressed']),
str(check_summary['resourcesSummary']['resourcesIgnored'])
])
else:
print("unable to append checks")
except:
print('Failed to get check: ' + checks['name'])
traceback.print_exc()
except:
print('Failed! Debug further.')
traceback.print_exc()
##rewrite dict to csv
with open('/tmp/checks_list.csv', 'w', newline='') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=['status','hasFlaggedResources','timestamp','resourcesSummary','categorySpecificSummary', 'checkId'])
csv_writer.writeheader()
csv_writer.writerow(check_summary)
return checks_list
if s3_upload(bucket_name, Filename, obj_name):
print("Successfully uploaded")
if __name__ == '__main__':
lambda_handler(event, context)
The error logs
unable to append checks
I'm new to Python. So, unsure of how to check for trackback stacks under else: statement. Is there any way to modify this code for getting traceback logs for the append block. Also, have i made any error in the above code. I'm unable to figure out any. PLz help

response = client.describe_trusted_advisor_check_summaries(
checkIds=[
'string',
]
)
describe_trusted_advisor_check_summaries() returns summarized results for one or more Trusted advisors. Here you are checking for the check_summary['status'] is not equal to not_avaialble i.e. alert status of the check is either "ok" (green), "warning" (yellow), "error" (red), and in that case, you are appending resourcesProcessed, resourcesFlagged, resourcesSuppressed, and resourcesIgnored to checks_list for further processing.
it's printing
unable to append checks
just because the status of the check is not_available. It is not an error log. Just deal with the case if the check status is not_available, what you should be doing?
See the documentation of describe_trusted_advisor_check_summaries. https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/support.html#Support.Client.describe_trusted_advisor_check_summaries

Related

Python for/while loop

Today i am working on a project about incoming phone calls being transcripted and getting saved into text files, but i am also kinda new to python and python loops.
I want to loop over a SQL server column and let each row loop trough the azure Speech to text service i use (all of the phonecall OID's). I have been stuck on this problem for a couple days now so i thought i might find some help here.
import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
import sys
import os.path
import pyodbc
databaseName = '*'
username = '*'
password = '*'
server = '*'
driver = '*'
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
while row:
array = [(int(row[1]))]
row = cursor.fetchone()
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
Telefoongesprek = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
with open("Telefoongesprek.mp3", "wb") as f:
f.write(Telefoongesprek.content)
src = "Telefoongesprek.mp3"
dst = "Telefoongesprek.wav"
sound = AudioSegment.from_file(src)
sound.export(dst, format="wav")
def speech_recognize_continuous_from_file():
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
speech_recognize_continuous_from_file()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
everything works apart form each other but i just dont know how to place the loop and witch one i should use (For/While loop). right here im trying to loop over an array but i dont this this is correct.
Error message: Decoding failed. ffmpeg returned error code: 1
[mp3 # 000001cb8c57e0o0] Failed to read frame size: could not seek to 1073.
which i am pretty sure means that my azure function can't find an mp3 file, what means that the "Mp3 to Wav" convert doesn't work.
Thanks in advance!
If I understand your question, you have a database with lots of phone call details. One of the field value in each row is used to create the associated mp3 file. You want to do speech to text using azure on each of the mp3 file you have in your database.
So you can do it in two ways:
Iterate though all rows in the database and create all the associted files into a folder in the local disk with the OID as your filename.
Then write another loop to iterate through this folder and send the files for transcription to Azure Speech to Text service.
The other technique is to do everything in a single loop like the way you have shown which will require some corrections.
Ok, so now that part is clear, we can go into the speech to text part. So azure allow you to send the compressed format for transcription, which means you actually don't need to convert it into wav file.
Please have a look at the modified code below with the changes:
# code snippet borrowed from azure samples
def speech_recognize_continuous_from_file(filename):
class BinaryFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
def __init__(self, filename: str):
super().__init__()
self._file_h = open(filename, "rb")
def read(self, buffer: memoryview) -> int:
try:
size = buffer.nbytes
frames = self._file_h.read(size)
buffer[:len(frames)] = frames
return len(frames)
except Exception as ex:
print('Exception in `read`: {}'.format(ex))
raise
def close(self) -> None:
print('closing file')
try:
self._file_h.close()
except Exception as ex:
print('Exception in `close`: {}'.format(ex))
raise
# Creates an audio stream format. For an example we are using MP3 compressed file here
compressed_format = speechsdk.audio.AudioStreamFormat(compressed_stream_format=speechsdk.AudioStreamContainerFormat.MP3)
callback = BinaryFileReaderCallback(filename=filename)
stream = speechsdk.audio.PullAudioInputStream(stream_format=compressed_format, pull_stream_callback=callback)
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# Creates a speech recognizer using a file as audio input, also specify the speech language
speech_recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
# loop through the rows
while row:
array = [(int(row[1]))]
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
telefoongesprek_response = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
# save the file to local disk as mp3
with open("Telefoongesprek.mp3", "wb") as f:
f.write(telefoongesprek_response.content)
# do the speech to text on the mp3 file
speech_recognize_continuous_from_file(f.name)
# fetch the next row
row = cursor.fetchone()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
I haven't tested this full code as i don't have the db connections with me. Please fell free to modify for your use case and let me know if you have any issues.

Python: Passing a variable to another function

I am really new to Python and was hoping someone could help me with this. I have a function (an AWS Lamdba function actually) that I need to pass a variable value after it is created. I'm doing this to create a campaign in Pinpoint when a segment is created.
import os
import time
import boto3
from botocore.exceptions import ClientError
from datetime import datetime,timedelta
AWS_REGION = os.environ['region']
projectId = os.environ['projectId']
importRoleArn = os.environ['importRoleArn']
def lambda_handler(event, context):
print("Received event: " + str(event))
for record in event['Records']:
# Assign some variables to make it easier to work with the data in the
# event recordi
bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
folder = os.path.split(key)[0]
folder_path = os.path.join(bucket, folder)
full_path = os.path.join(bucket, key)
s3_url = "s3://" + folder_path
# print(full_path);
# Check to see if all file parts have been processed.
if all_files_processed(bucket, folder, full_path):
# If you haven't recently run an import job that uses a file stored in
# the specified S3 bucket, then create a new import job. This prevents
# the creation of duplicate segments.
if not (check_import_jobs(bucket, folder, s3_url, full_path)):
segmentID = create_import_job(s3_url, full_path)
create_campaign(segmentID)
else:
print("Import job found with URL s3://"
+ os.path.join(bucket,folder) + ". Aborting.")
else:
print("Parts haven't finished processing yet.")
# Determine if all of the file parts have been processed.
def all_files_processed(bucket, folder, full_path):
# Use the "__ofN" part of the file name to determine how many files there
# should be.
number_of_parts = int((full_path.split("__of")[1]).split("_processed")[0])
# Figure out how many keys contain the prefix for the current batch of
# folders (basically, how many files are in the appropriate "folder").
client = boto3.client('s3')
objs = client.list_objects_v2(Bucket=bucket,Prefix=folder)
file_count = objs['KeyCount']
ready_for_import = False
if file_count == number_of_parts:
ready_for_import = True
return ready_for_import
# Check Amazon Pinpoint to see if any import jobs have been created by using
# the same S3 folder.
def check_import_jobs(bucket, folder, s3_url, full_path):
url_list = []
print(s3_url);
# Retrieve a list of import jobs for the current project ID.
client = boto3.client('pinpoint')
try:
client_response = client.get_import_jobs(
ApplicationId=projectId
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
segment_response = client_response['ImportJobsResponse']['Item']
#print(segment_response);
# Parse responses. Add all S3Url values to a list.
for item in segment_response:
#print(item);
s3_url_existing = full_path
url_list.append(s3_url_existing)
#print(url_list);
# Search for the current S3 URL in the list.
if s3_url in url_list:
found = True
else:
found = False
print(found);
return found
# Create the import job in Amazon Pinpoint.
def create_import_job(s3_url, full_path):
client = boto3.client('pinpoint')
segment_name = s3_url.split('/')[4]
try:
response = client.create_import_job(
ApplicationId=projectId,
ImportJobRequest={
'DefineSegment': True,
'Format': 'CSV',
'RegisterEndpoints': True,
'RoleArn': importRoleArn,
'S3Url': s3_url,
'SegmentName': segment_name
}
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
print("Import job " + response['ImportJobResponse']['Id'] + " "
+ response['ImportJobResponse']['JobStatus'] + ".")
print("Segment ID: "
+ response['ImportJobResponse']['Definition']['SegmentId'])
print("Application ID: " + projectId)
return response['ImportJobResponse']['Definition']['SegmentId']
def create_campaign(segmentID):
client = boto3.client('pinpoint')
now = datetime.now()
dt_string = now.isoformat()
print(type(segmentID))
try:
response = client.create_campaign(
ApplicationId=projectId,
WriteCampaignRequest={
'Description': 'Test SMS Campaign 2',
'MessageConfiguration': {
'EmailMessage': {
'Body': 'This is a test 2',
'FromAddress': 'xxx#xxx.com',
'HtmlBody': '<p>Test 2</p>',
'Title': 'This is a test 2'
},
'SMSMessage': {
'Body': 'Thanks for your visit to {{Attributes.Provider_Name}} on {{Attributes.Clinical_Date_of_Service}}',
'MessageType': 'PROMOTIONAL',
'SenderId': 'XXX'
}
},
'Schedule': {
'Frequency': 'ONCE',
'IsLocalTime': True,
'StartTime': dt_string,
'Timezone': 'UTC'
},
'Name': 'Test Email Campaign 6',
'SegmentId': segmentID
}
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
print('Campaign Created')
The issue comes up in create_campaign where I want to send the SegmentID to. I end up getting the following error...
"Segment specified in SegmentId is not found"
I can print the segmentID to the console no problem, it's just getting it to pass to the function is the roadblock I'm hitting. Thanks in advance!

Python - Problem use loops to get day by day

I'm trying to make a google api downloader that will bypass api restrictions.
Unfortunately, what I do does not work. As I do not introduce loops, unfortunately the change of dates by hand works when downloading historical data is very easy. For new data, no problem skypt generates data daily.
Below is the code at which I try to download data from a specific range day by day.
The problem occurs in this part of the code
def get_top_keywords(service, profile_id):
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()
Below the whole code
import argparse
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import csv
from __future__ import print_function
from googleapiclient.errors import HttpError
from googleapiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
import datetime as dt
start_date = dt.datetime(2019, 01,1)
end_date = dt.datetime(2019, 01,5)
total_days = (end_date - start_date).days + 1
def main(argv):
# Authenticate and construct service.
service, flags = sample_tools.init(
argv, 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/auth/analytics.readonly')
# Try to make a request to the API. Print the results or handle errors.
try:
first_profile_id = '11111111111' # Hard Code View Profile ID Here
if not first_profile_id:
print('Could not find a valid profile for this user.')
else:
results = get_top_keywords(service, first_profile_id)
print_results(results)
except TypeError as error:
# Handle errors in constructing a query.
print(('There was an error in constructing your query : %s' % error))
except HttpError as error:
# Handle API errors.
print(('Arg, there was an API error : %s : %s' %
(error.resp.status, error._get_reason())))
except AccessTokenRefreshError:
# Handle Auth errors.
print ('The credentials have been revoked or expired, please re-run '
'the application to re-authorize')
def get_first_profile_id(service):
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
firstAccountId = accounts.get('items')[0].get('id')
webproperties = service.management().webproperties().list(
accountId=firstAccountId).execute()
if webproperties.get('items'):
firstWebpropertyId = webproperties.get('items')[0].get('id')
profiles = service.management().profiles().list(
accountId=firstAccountId,
webPropertyId=firstWebpropertyId).execute()
if profiles.get('items'):
return profiles.get('items')[0].get('id')
return None
def get_top_keywords(service, profile_id):
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()
def print_results(results):
print()
print('Profile Name: %s' % results.get('profileInfo').get('profileName'))
print()
# Open a file.
filepath = '/temp/' #change this to your actual file path
filename = 'temp.csv' #change this to your actual file name
f = open(filepath + filename, 'wt')
# Wrap file with a csv.writer
writer = csv.writer(f, lineterminator='\n')
# Write header.
header = [h['name'][3:] for h in results.get('columnHeaders')] #this takes the column headers and gets rid of ga: prefix
writer.writerow(header)
print(''.join('%30s' %h for h in header))
# Write data table.
if results.get('rows', []):
for row in results.get('rows'):
writer.writerow(row)
print(''.join('%30s' %r for r in row))
print('\n')
print ('Success Data Written to CSV File')
print ('filepath = ' + filepath)
print ('filename = '+ filename)
else:
print ('No Rows Found')
# Close the file.
f.close()
if __name__ == '__main__':
main(sys.argv)
What should I improve to make the date automatically reach the end of the loop?
Why the loop doesn't work, I'm just starting with python but I'm trying
Yes, I added a loop, I just did something wrong, because when I test the loop itself, it works, but when I apply it to the code connecting to api, it doesn't work anymore
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()

How to check if a file has completed uploading into S3 Bucket using Boto in Python?

I'm trying to upload an image into S3 bucket using boto. After the image has successfully uploaded, I want to perform a certain operation using the file URL of the image in the S3 bucket. The problem is that sometimes the image doesn't upload fast enough and I end up with a server error when I want to perform the operation dependent on the file URL of the Image.
This is my source code. I'm using python flask.
def search_test(consumer_id):
consumer = session.query(Consumer).filter_by(consumer_id=consumer_id).one()
products = session.query(Product).all()
product_dictionary = {'Products': [p.serialize for p in products]}
if request.method == 'POST':
p_product_image_url = request.files['product_upload_url']
s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = s3.get_bucket(AWS_BUCKET_NAME)
k = Key(bucket)
if p_product_image_url and allowed_file(p_product_image_url.filename):
# Read the contents of the file
file_content = p_product_image_url.read()
# Use Boto to upload the file to S3
k.set_metadata('Content-Type', mimetypes.guess_type(p_product_image_url.filename))
k.key = secure_filename(p_product_image_url.filename)
k.set_contents_from_string(file_content)
print ('consumer search upload successful')
new_upload = Uploads(picture_upload_url=k.key.replace(' ', '+'), consumer=consumer)
session.add(new_upload)
session.commit()
new_result = jsonify(Result=perform_actual_search(amazon_s3_base_url + k.key.replace(' ', '+'),
product_dictionary))
return new_result
else:
return render_template('upload_demo.html', consumer_id=consumer_id)
The jsonify method needs a valid image url to perform the operation. It works sometimes, sometimes it doesn't. The reason I suspect being due to the issue that the image would not have uploaded yet by the time it executes that line of code.
The perform_actual_search method is as follows:
def get_image_search_results(image_url):
global description
url = ('http://style.vsapi01.com/api-search/by-url/?apikey=%s&url=%s' % (just_visual_api_key, image_url))
h = httplib2.Http()
response, content = h.request(url,
'GET') # alternatively write content=h.request((url,'GET')[1]) ///Numbr 2 in our array
result = json.loads(content)
result_dictionary = []
for i in range(0, 10):
if result:
try:
if result['errorMessage']:
result_dictionary = []
except:
pass
if result['images'][i]:
images = result['images'][i]
jv_img_url = images['imageUrl']
title = images['title']
try:
if images['description']:
description = images['description']
else:
description = "no description"
except:
pass
# print("\njv_img_url: %s,\ntitle: %s,\ndescription: %s\n\n"% (
# jv_img_url, title, description))
image_info = {
'image_url': jv_img_url,
'title': title,
'description': description,
}
result_dictionary.append(image_info)
if result_dictionary != []:
# for i in range(len(result_dictionary)):
# print (result_dictionary[i])
# print("\n\n")
return result_dictionary
else:
return []
def performSearch(jv_input_dictionary, imagernce_products_dict):
print jv_input_dictionary
print imagernce_products_dict
global common_desc_ratio
global isReady
image_search_results = []
if jv_input_dictionary != []:
for i in range(len(jv_input_dictionary)):
print jv_input_dictionary[i]
for key in jv_input_dictionary[i]:
if key == 'description':
input_description = jv_input_dictionary[i][key]
s1w = re.findall('\w+', input_description.lower())
s1count = Counter(s1w)
print input_description
for j in imagernce_products_dict:
if j == 'Products':
for q in range(len(imagernce_products_dict['Products'])):
for key2 in imagernce_products_dict['Products'][q]:
if key2 == 'description':
search_description = imagernce_products_dict['Products'][q]['description']
print search_description
s2w = re.findall('\w+', search_description.lower())
s2count = Counter(s2w)
# Commonality magic
common_desc_ratio = difflib.SequenceMatcher(None, s1w, s2w).ratio()
print('Common ratio is: %.2f' % common_desc_ratio)
if common_desc_ratio > 0.09:
image_search_results.append(imagernce_products_dict['Products'][q])
if image_search_results:
print image_search_results
return image_search_results
else:
return {'404': 'No retailers registered with us currently own this product.'}
def perform_actual_search(image_url, imagernce_product_dictionary):
return performSearch(get_image_search_results(image_url), imagernce_product_dictionary)
Any help solving this would be greatly appreciated.
I would configure S3 to generate notifications on events such as s3:ObjectCreated:*
Notifications can be posted to an SNS topic, a SQS queue or directly trigger a lambda function.
More details about S3 notifications : http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html
You should rewrite your code to separate the upload part and the image processing part. The later can be implemented as a Lambda function in Python.
Working in an Asynchronous way is key here, writing blocking code is usually not scalable.
you can compare bytes written to s3 with file size. lets say you use following method to write to s3:
bytes_written = key.set_contents_from_file(file_binary, rewind=True)
in your case it's set_contents_from_string
then I would compare, bytes_written with p_product_image_url.seek(0, os.SEEK_END)
if they match. whole file has been uploaded to s3.

Grab Bing Wallpaper with python3?

I wanna write a python script that grabs the bing.com wallpaper and saves it.
The urls of these wallpapers look like:
http://www.bing.com/az/hprichbg/rb/EuropeESA_DE-DE7849418832_1920x1080.jpg
http://www.bing.com/az/hprichbg/rb/CanisLupus_DE-DE11366975292_1920x1080.jpg
http://www.bing.com/az/hprichbg/rb/HouseBoats_DE-DE8695714746_1920x1080.jpg
Is there a way to find the image url of todays wallpaper automatically?
Based on a few of the useful answers in this related SO question, here's a simple Python script to fetch the Bing photo of the day:
import requests
import json
BING_URI_BASE = "http://www.bing.com"
BING_WALLPAPER_PATH = "/HPImageArchive.aspx?format=js&idx=0&n=1&mkt=en-US"
# open the Bing HPImageArchive URI and ask for a JSON response
resp = requests.get(BING_URI_BASE + BING_WALLPAPER_PATH)
if resp.status_code == 200:
json_response = json.loads(resp.content)
wallpaper_path = json_response['images'][0]['url']
filename = wallpaper_path.split('/')[-1]
wallpaper_uri = BING_URI_BASE + wallpaper_path
# open the actual wallpaper uri, and write the response as an image on the filesystem
response = requests.get(wallpaper_uri)
if resp.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
else:
raise ValueError("[ERROR] non-200 response from Bing server for '{}'".format(wallpaper_uri))
else:
raise ValueError("[ERROR] non-200 response from Bing server for '{}'".format(BING_URI_BASE + BING_WALLPAPER_PATH))
This will write a file such as TurtleTears_EN-US7942276596_1920x1080.jpg to the same directory where the script is executed. Of course, can tweak a whole bunch of things here, but gets the job done reasonably easily.
Grab it and save it in folder by using this Code:
import datetime
from urllib.request import urlopen, urlretrieve
from xml.dom import minidom
import os
import sys
def join_path(*args):
# Takes an list of values or multiple values and returns an valid path.
if isinstance(args[0], list):
path_list = args[0]
else:
path_list = args
val = [str(v).strip(' ') for v in path_list]
return os.path.normpath('/'.join(val))
dir_path = os.path.dirname(os.path.realpath(__file__))
save_dir = join_path(dir_path, 'images')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def set_wallpaper(pic_path):
if sys.platform.startswith('win32'):
cmd = 'REG ADD \"HKCU\Control Panel\Desktop\" /v Wallpaper /t REG_SZ /d \"%s\" /f' %pic_path
os.system(cmd)
os.system('rundll32.exe user32.dll, UpdatePerUserSystemParameters')
print('Wallpaper is set.')
elif sys.platform.startswith('linux'):
os.system(''.join(['gsettings set org.gnome.desktop.background picture-uri file://', pic_path]))
print('Wallpaper is set.')
else:
print('OS not supported.')
return
return
def download_old_wallpapers(minus_days=False):
"""Uses download_wallpaper(set_wallpaper=False) to download the last 20 wallpapers.
If minus_days is given an integer a specific day in the past will be downloaded.
"""
if minus_days:
download_wallpaper(idx=minus_days, use_wallpaper=False)
return
for i in range(0, 20): # max 20
download_wallpaper(idx=i, use_wallpaper=False)
def download_wallpaper(idx=0, use_wallpaper=True):
# Getting the XML File
try:
usock = urlopen(''.join(['http://www.bing.com/HPImageArchive.aspx?format=xml&idx=',
str(idx), '&n=1&mkt=ru-RU'])) # ru-RU, because they always have 1920x1200 resolution
except Exception as e:
print('Error while downloading #', idx, e)
return
try:
xmldoc = minidom.parse(usock)
# This is raised when there is trouble finding the image url.
except Exception as e:
print('Error while processing XML index #', idx, e)
return
# Parsing the XML File
for element in xmldoc.getElementsByTagName('url'):
url = 'http://www.bing.com' + element.firstChild.nodeValue
# Get Current Date as fileName for the downloaded Picture
now = datetime.datetime.now()
date = now - datetime.timedelta(days=int(idx))
pic_path = join_path(save_dir, ''.join([date.strftime('bing_wp_%d-%m-%Y'), '.jpg']))
if os.path.isfile(pic_path):
print('Image of', date.strftime('%d-%m-%Y'), 'already downloaded.')
if use_wallpaper:
set_wallpaper(pic_path)
return
print('Downloading: ', date.strftime('%d-%m-%Y'), 'index #', idx)
# Download and Save the Picture
# Get a higher resolution by replacing the file name
urlretrieve(url.replace('_1366x768', '_1920x1200'), pic_path)
# Set Wallpaper if wanted by user
if use_wallpaper:
set_wallpaper(pic_path)
if __name__ == "__main__":
download_wallpaper()
for number, url in enumerate(list_of_urls):
urllib.urlretrieve(url, 'Image {}.jpg'.format(number + 1))

Categories

Resources