S3 show buckets last modified - python

I'm trying to list the last modified file in S3 buckets for a report but the report is showing the first modified (ie when the first file was uploaded not the last file).
I'm using this:
top_level_folders[folder]['modified'] = obj.last_modified
and adding to the report here:
report.add_row([folder[1]['name'], folder[1]['objects'],
str(round(folder[1]['size'],2)), status, folder[1]['modified']])
I've tried adding
=obj.last_modified, reverse=True but keep getting invalid syntax errors.
This is what the report looks like:

I'm not exactly sure as to what you're doing when it comes to writing to the report, but the code below will return a list of dictionaries with the name of each bucket and the time the last-modified file was last modified. E.g.,
[
{
'Folder': 'bucket_1',
'Last Modified': '2021-11-30 13:10:32+00:00'
},
{
'Folder': 'bucket_2',
'Last Modified': '2021-09-27 17:18:27+00:00'
}
]
import datetime
import boto3
s3_client = boto3.client('s3',
aws_access_key_id="AKXXXXXXXXXXXXXXXXXX",
aws_secret_access_key="YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY",
region_name="eu-west-2"
)
def find_last_modified_file_in_bucket(bucket_name: str) -> datetime:
last_modified = []
for bucket_object in s3_client.list_objects(Bucket=bucket_name)["Contents"]:
last_modified.append(bucket_object["LastModified"])
return max(last_modified)
def fetch_last_modified() -> [{}]:
last_modified_file_by_bucket: list[{}] = []
for bucket_name in list(map(lambda bucket: bucket["Name"], s3_client.list_buckets()["Buckets"])):
latest_time_of_last_modified_file: datetime = find_last_modified_file_in_bucket(bucket_name)
last_modified_file_by_bucket.append(
{
"Folder": bucket_name,
"Last Modified": str(latest_time_of_last_modified_file)
}
)
return last_modified_file_by_bucket
Without the source code or knowledge of the type of folder, I can't say with certainty how you would use the above code to update the folder, but it will likely come down to iterating over the dict returned by fetch_last_modified(). E.g.,
def update_report(report: Report, folder_with_last_modified: dict):
for folder in folder_with_last_modified:
report.add_row(folder['Folder'], folder['Last Modified'])
folder_with_last_modified = fetch_last_modified()
update_report(report, folder_with_last_modified)

Related

s3 policy update through lambda/boto3 - list index out of range error

I have a requirement to update S3 bucket policy (Get the current resource ARNs and append new Resource ARN). Here is the snippet of code:
import os
import boto3
import pprint
import json
import sys
s3 = boto3.resource('s3')
buckets = ["test090909"]
for s3b in buckets:
print("processing bucket" + s3b)
bucket = s3.Bucket(s3b)
policy = bucket.Policy()
p = json.loads(policy.policy)
print(p) # Good until here
stmt = p["Statement"][1]
print(stmt)
The output of p is as below all good until that but if I want to get Resource section then it should be stmt = p["Statement"][1] as this is dict and list index is 1 but I am getting an error IndexError: list index out of range but if I do stmt = p["Statement"][0] it returning everything. I believe I am doing some thing wrong with string/json items I believe.
{
u'Version': u'2012-10-17',
u'Id': u'Policy1544682557303',
u'Statement': [
{
u'Action': u's3:DeleteBucket',
u'Principal': {
u'Service': u'config.amazonaws.com'
},
u'Resource': [
u'arn:aws:s3:::test090909',
u'arn:aws:s3:::test090909/AWSLogs/111111111111/Config/*'
],
u'Effect': u'Allow',
u'Sid': u'Stmt1544682555302'
}
]
}
it should be "stmt = p["Statement"][1]" as this is dict and list index
is 1 but i am getting an error " IndexError: list index out of range"
but if i do "stmt = p["Statement"][0]" it returning everything
This is not correct. Given the json output, p["Statement"][1] doesn't exist hence the out of range error is raised. p["Statement"] contains only one item. Using your words, p["Statement"][0] returns "everything" because it actually contains everything. It contains a list of Item and one of the item is a list of arn resources.
There you go:
>>print(p["Statement"][0])
[ { u'Action': u's3:DeleteBucket', u'Principal': { u'Service': u'config.amazonaws.com' }, u'Resource': [ u'arn:aws:s3:::test090909', u'arn:aws:s3:::test090909/AWSLogs/111111111111/Config/*' ], u'Effect': u'Allow', u'Sid': u'Stmt1544682555302' } ]
>>print(p["Statement"][0]["Resource"])
[ u'arn:aws:s3:::test090909', u'arn:aws:s3:::test090909/AWSLogs/111111111111/Config/*' ]
Then if you want to access one of the specific resource:
>>print(p["Statement"][0]["Resource"][0])
arn:aws:s3:::test090909
>>print(p["Statement"][0]["Resource"][1])
arn:aws:s3:::test090909/AWSLogs/111111111111/Config/*
Happy coding!
Python indexing begins at 0, so p["Statement"][0] will return the 1st item in that list.

Compare images and save in dynamodb aws

Hi I want to write a lambda function which will work like. I have two folder in s3 bucket . in 1st box there are "owner" and 2nd have random pictures. I want to compare all pictures with owner and then save in dynamodb with owner name against everypicture . Atm I am lost in API of face detection and doing some thing like this
BUCKET = "ais-django"
KEY = "20180530105812.jpeg"
FEATURES_BLACKLIST = ("Landmarks", "Emotions", "Pose", "Quality", "BoundingBox", "Confidence")
def detect_faces(bucket, key, attributes=['ALL'], region="eu-west-1"):
rekognition = boto3.client("rekognition", region)
response = rekognition.detect_faces(
Image={
"S3Object": {
"Bucket": bucket,
"Name": key,
}
},
Attributes=attributes,
)
return response['FaceDetails']
for face in detect_faces(BUCKET, KEY):
print
"Face ({Confidence}%)".format(**face)
# emotions
for emotion in face['Emotions']:
print
" {Type} : {Confidence}%".format(**emotion)
# quality
for quality, value in face['Quality'].iteritems():
print
" {quality} : {value}".format(quality=quality, value=value)
# facial features
for feature, data in face.iteritems():
if feature not in FEATURES_BLACKLIST:
print
" {feature}({data[Value]}) : {data[Confidence]}%".format(feature=feature, data=data)
You can use compare_faces operation of Rekognition client. Here is a pseudocode reflecting the operation (Note: this code is not tested and only to show the concepts). You can adjust the similarity threshold as per your need.
client = boto3.client('rekognition', region_name='eu-west-1')
keyNamesInsideRandomFolder=['1.jpg','2.jpg']
for key in keyNamesInsideRandomFolder:
response = client.detect_faces(
'S3Object': {
'Bucket': "bucketname",
'Name': "randomfolder/"+key
}
)
faceDetails = response['FaceDetails']
hasFace = len(faceDetails) > 0
if hasFace:
response = client.compare_faces(
SimilarityThreshold=90,
SourceImage={
'S3Object': {
'Bucket': "bucketname",
'Name': "ownerfolder/ownerimage.jpg"
}
},
TargetImage={
'S3Object': {
'Bucket': "bucketname",
'Name': "randomfolder/"+key
},
}
)
faceMatch= response['FaceMatches']
similarity = faceMatch['Similarity']
if similarity>90:
#write to dynamodb
Edited: To get a list of objects from a folder with prefix/folder 'random', use list_objects operation of s3 client.
response = client.list_objects(
Bucket='bucketname',
Prefix='random'
)
numberofobjects=len(response['Contents'])
keyNamesInsideRandomFolder=[]
for x in range(1, numberofobjects):
keyNamesInsideRandomFolder.append(response['Contents'][x]['Key'])
Note: The response['Contents'][x]['Key'] returns the key name of object along with the prefix. For eg. If you have an image with filename img.jpg inside random folder, it returns "random/img.jpg". Notice I started the for loop from 1 that is because the first element returned as response is just the keyname of the folder i.e "random/" in this case.

Amazon lambda dynamodb update_item() only accepts keyword arguments

I am trying to save data in dynamodb using update_item for the first time. In another area of my project I have used put_item() successfully. For this new area of code I am saving only items that change, leaving items in the db that are unchanged. Thus, I need to use update_item(). However, I can't seem to figure out why my syntax is not correct for the API call. I am using this directly from the Amazon UI.
Here is my python code:
from __future__ import print_function
import json
import boto3
print('Loading function')
def saveScreenData(event, context):
dynamodb = boto3.client('dynamodb', region_name='us-east-1', endpoint_url="https://dynamodb.us-east-1.amazonaws.com")
print('The event: {}'.format(event))
key = {}
key['UID'] = event['uid']
key['screenId'] = event['screenid']
print('Key: {}'.format(key))
for item, val in event.items():
if item != 'uid' and item != 'screenid':
print("Saving!")
response = dynamodb.update_item({
"TableName" : "ScreenData",
"Key" : key,
"UpdateExpression" : "SET #attrName = :attrValue",
"ExpressionAttributeNames" : {
"#attrName" : item
},
"ExpressionAttributeValues" : {
":attrValue" : val
}
})
print('Response: {}'.format(response))
return response
Here is the output:
START RequestId: 2da9412a-b03d-11e7-9dc8-8fcb305833f6 Version: $LATEST
The event: {'article': '<p>↵ First!↵</p>', 'screenid': '13', 'uid': '0', 'section1': '<h1>↵ Second↵</h1>'}
Key: {'UID': '0', 'screenId': '13'}
Saving!
update_item() only accepts keyword arguments.: TypeError
Traceback (most recent call last):
File "/var/task/saveScreenData.py", line 30, in saveScreenData
":attrValue" : val
File "/var/runtime/botocore/client.py", line 310, in _api_call
"%s() only accepts keyword arguments." % py_operation_name)
TypeError: update_item() only accepts keyword arguments.
END RequestId: 2da9412a-b03d-11e7-9dc8-8fcb305833f6
I have researched the update_item docs (https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_UpdateItem.html) and have modeled my query after this SO q&a by mkobit (https://stackoverflow.com/users/627727/mkobit): https://stackoverflow.com/a/30604746/8027640
I have played with variations on the syntax, including adding the dictionary {"S" : "maybe this works"} instead of my variable val, and have also tried changing the variable to some static content to see if it works, but no luck.
Clearly this is a syntax issue, but I have been unable to track it down. Suggestions?
I think the example your are using is based on boto2 which has quite different interface comparing to boto3.
Instead, look into the boto3 documentation, you should use keyword arguments as the error states (and you are using the dictionary).
Your request should look approximately like this:
response = dynamodb.update_item(
TableName="ScreenData",
Key=key,
UpdateExpression="SET #attrName = :attrValue",
ExpressionAttributeNames={
"#attrName" : item
},
ExpressionAttributeValues={
":attrValue" : val
}
)

Dynamically create list/dict during for loop for conversion to JSON

I am trying to build a list/dict that will be converted to JSON later on. I am trying to write the code that builds and populates the multiple levels of the JSON format I ultimately need. I am having an issue wrapping my head around this. Thank you for the help.
What I ultimately need -> Populate this list/dict:
dataset_permission_json = []
with this format:
{
"projects":[
{
"project":"test-project-1",
"datasets":[
{
"dataset":"testing1",
"permissions":[
{
"role":"READER",
"google_group":"testing1#test.com"
}
]
},
{
"dataset":"testing2",
"permissions":[
{
"role":"OWNER",
"google_group":"testing2#test.com"
}
]
},
{
"dataset":"testing3",
"permissions":[
{
"role":"READER",
"google_group":"testing3#test.com"
}
]
},
{
"dataset":"testing4",
"permissions":[
{
"role":"WRITER",
"google_group":"testing4#test.com"
}
]
}
]
}
]
}
I have multiple for loops that successfully print out the information I am pulling from an external API but I to be able to enter that data into the list/dict. The dynamic values I am trying to input are:
'project' i.e. test-project-1
'dataset' i.e. testing1
'role' i.e. READER
'google_group' i.e. testing1#test.com
I have tried things like:
dataset_permission_json.update({'project': project})
but cannot figure out how not to overwrite the data during the multiple for loops.
for project in projects:
print(project) ## Need to add this variable to 'projects'
for bq_group in bq_groups:
delegated_credentials = credentials.create_delegated(bq_group)
http_auth = delegated_credentials.authorize(Http())
list_datasets_in_project = bigquery_service.datasets().list(projectId=project).execute()
datasets = list_datasets_in_project.get('datasets',[])
print(dataset['datasetReference']['datasetId']) ##Add the dataset to 'datasets' under the project
for dataset in datasets:
get_dataset_permissions_result = bigquery_service.datasets().get(projectId=project, datasetId=dataset['datasetReference']['datasetId']).execute()
dataset_permissions = get_dataset_permissions_result.get('access',[])
### ADD THE NEXT LEVEL 'permissions' level here?
for dataset_permission in dataset_permissions:
if 'groupByEmail' in dataset_permission:
if bq_group in dataset_permission['groupByEmail']:
print(dataset['datasetReference']['datasetId'] && dataset_permission['groupByEmail']) ##Add to each dataset
I appreciate the help.
EDIT: Updated Progress
Ok I have created the nested structure that I was looking for using StackOverflow
Things are great except for the last part. I am trying to append the role & group to each 'permission' nest, but after everything runs the data is only appended to the last 'permission' nest in the JSON structure. It seems like it is overwriting itself during the for loop. Thoughts?
Updated for loop:
for project in projects:
for bq_group in bq_groups:
delegated_credentials = credentials.create_delegated(bq_group)
http_auth = delegated_credentials.authorize(Http())
list_datasets_in_project = bigquery_service.datasets().list(projectId=project).execute()
datasets = list_datasets_in_project.get('datasets',[])
for dataset in datasets:
get_dataset_permissions_result = bigquery_service.datasets().get(projectId=project, datasetId=dataset['datasetReference']['datasetId']).execute()
dataset_permissions = get_dataset_permissions_result.get('access',[])
for dataset_permission in dataset_permissions:
if 'groupByEmail' in dataset_permission:
if bq_group in dataset_permission['groupByEmail']:
dataset_permission_json['projects'][project]['datasets'][dataset['datasetReference']['datasetId']]['permissions']
permission = {'group': dataset_permission['groupByEmail'],'role': dataset_permission['role']}
dataset_permission_json['permissions'] = permission
UPDATE: Solved.
dataset_permission_json['projects'][project]['datasets'][dataset['datasetReference']['datasetId']]['permissions']
permission = {'group': dataset_permission['groupByEmail'],'role': dataset_permission['role']}
dataset_permission_json['projects'][project]['datasets'][dataset['datasetReference']['datasetId']]['permissions'] = permission

Example of update_item in dynamodb boto3

Following the documentation, I'm trying to create an update statement that will update or add if not exists only one attribute in a dynamodb table.
I'm trying this
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET',
ConditionExpression='Attr(\'ReleaseNumber\').eq(\'1.0.179\')',
ExpressionAttributeNames={'attr1': 'val1'},
ExpressionAttributeValues={'val1': 'false'}
)
The error I'm getting is:
botocore.exceptions.ClientError: An error occurred (ValidationException) when calling the UpdateItem operation: ExpressionAttributeNames contains invalid key: Syntax error; key: "attr1"
If anyone has done anything similar to what I'm trying to achieve please share example.
Found working example here, very important to list as Keys all the indexes of the table, this will require additional query before update, but it works.
response = table.update_item(
Key={
'ReleaseNumber': releaseNumber,
'Timestamp': result[0]['Timestamp']
},
UpdateExpression="set Sanity = :r",
ExpressionAttributeValues={
':r': 'false',
},
ReturnValues="UPDATED_NEW"
)
Details on dynamodb updates using boto3 seem incredibly sparse online, so I'm hoping these alternative solutions are useful.
get / put
import boto3
table = boto3.resource('dynamodb').Table('my_table')
# get item
response = table.get_item(Key={'pkey': 'asdf12345'})
item = response['Item']
# update
item['status'] = 'complete'
# put (idempotent)
table.put_item(Item=item)
actual update
import boto3
table = boto3.resource('dynamodb').Table('my_table')
table.update_item(
Key={'pkey': 'asdf12345'},
AttributeUpdates={
'status': 'complete',
},
)
If you don't want to check parameter by parameter for the update I wrote a cool function that would return the needed parameters to perform a update_item method using boto3.
def get_update_params(body):
"""Given a dictionary we generate an update expression and a dict of values
to update a dynamodb table.
Params:
body (dict): Parameters to use for formatting.
Returns:
update expression, dict of values.
"""
update_expression = ["set "]
update_values = dict()
for key, val in body.items():
update_expression.append(f" {key} = :{key},")
update_values[f":{key}"] = val
return "".join(update_expression)[:-1], update_values
Here is a quick example:
def update(body):
a, v = get_update_params(body)
response = table.update_item(
Key={'uuid':str(uuid)},
UpdateExpression=a,
ExpressionAttributeValues=dict(v)
)
return response
The original code example:
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET',
ConditionExpression='Attr(\'ReleaseNumber\').eq(\'1.0.179\')',
ExpressionAttributeNames={'attr1': 'val1'},
ExpressionAttributeValues={'val1': 'false'}
)
Fixed:
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET #attr1 = :val1',
ConditionExpression=Attr('ReleaseNumber').eq('1.0.179'),
ExpressionAttributeNames={'#attr1': 'val1'},
ExpressionAttributeValues={':val1': 'false'}
)
In the marked answer it was also revealed that there is a Range Key so that should also be included in the Key. The update_item method must seek to the exact record to be updated, there's no batch updates, and you can't update a range of values filtered to a condition to get to a single record. The ConditionExpression is there to be useful to make updates idempotent; i.e. don't update the value if it is already that value. It's not like a sql where clause.
Regarding the specific error seen.
ExpressionAttributeNames is a list of key placeholders for use in the UpdateExpression, useful if the key is a reserved word.
From the docs, "An expression attribute name must begin with a #, and be followed by one or more alphanumeric characters". The error is because the code hasn't used an ExpressionAttributeName that starts with a # and also not used it in the UpdateExpression.
ExpressionAttributeValues are placeholders for the values you want to update to, and they must start with :
Based on the official example, here's a simple and complete solution which could be used to manually update (not something I would recommend) a table used by a terraform S3 backend.
Let's say this is the table data as shown by the AWS CLI:
$ aws dynamodb scan --table-name terraform_lock --region us-east-1
{
"Items": [
{
"Digest": {
"S": "2f58b12ae16dfb5b037560a217ebd752"
},
"LockID": {
"S": "tf-aws.tfstate-md5"
}
}
],
"Count": 1,
"ScannedCount": 1,
"ConsumedCapacity": null
}
You could update it to a new digest (say you rolled back the state) as follows:
import boto3
dynamodb = boto3.resource('dynamodb', 'us-east-1')
try:
table = dynamodb.Table('terraform_lock')
response = table.update_item(
Key={
"LockID": "tf-aws.tfstate-md5"
},
UpdateExpression="set Digest=:newDigest",
ExpressionAttributeValues={
":newDigest": "50a488ee9bac09a50340c02b33beb24b"
},
ReturnValues="UPDATED_NEW"
)
except Exception as msg:
print(f"Oops, could not update: {msg}")
Note the : at the start of ":newDigest": "50a488ee9bac09a50340c02b33beb24b" they're easy to miss or forget.
Small update of Jam M. Hernandez Quiceno's answer, which includes ExpressionAttributeNames to prevent encoutering errors such as:
"errorMessage": "An error occurred (ValidationException) when calling the UpdateItem operation:
Invalid UpdateExpression: Attribute name is a reserved keyword; reserved keyword: timestamp",
def get_update_params(body):
"""
Given a dictionary of key-value pairs to update an item with in DynamoDB,
generate three objects to be passed to UpdateExpression, ExpressionAttributeValues,
and ExpressionAttributeNames respectively.
"""
update_expression = []
attribute_values = dict()
attribute_names = dict()
for key, val in body.items():
update_expression.append(f" #{key.lower()} = :{key.lower()}")
attribute_values[f":{key.lower()}"] = val
attribute_names[f"#{key.lower()}"] = key
return "set " + ", ".join(update_expression), attribute_values, attribute_names
Example use:
update_expression, attribute_values, attribute_names = get_update_params(
{"Status": "declined", "DeclinedBy": "username"}
)
response = table.update_item(
Key={"uuid": "12345"},
UpdateExpression=update_expression,
ExpressionAttributeValues=attribute_values,
ExpressionAttributeNames=attribute_names,
ReturnValues="UPDATED_NEW"
)
print(response)
An example to update any number of attributes given as a dict, and keep track of the number of updates. Works with reserved words (i.e name).
The following attribute names shouldn't be used as we will overwrite the value: _inc, _start.
from typing import Dict
from boto3 import Session
def getDynamoDBSession(region: str = "eu-west-1"):
"""Connect to DynamoDB resource from boto3."""
return Session().resource("dynamodb", region_name=region)
DYNAMODB = getDynamoDBSession()
def updateItemAndCounter(db_table: str, item_key: Dict, attributes: Dict) -> Dict:
"""
Update item or create new. If the item already exists, return the previous value and
increase the counter: update_counter.
"""
table = DYNAMODB.Table(db_table)
# Init update-expression
update_expression = "SET"
# Build expression-attribute-names, expression-attribute-values, and the update-expression
expression_attribute_names = {}
expression_attribute_values = {}
for key, value in attributes.items():
update_expression += f' #{key} = :{key},' # Notice the "#" to solve issue with reserved keywords
expression_attribute_names[f'#{key}'] = key
expression_attribute_values[f':{key}'] = value
# Add counter start and increment attributes
expression_attribute_values[':_start'] = 0
expression_attribute_values[':_inc'] = 1
# Finish update-expression with our counter
update_expression += " update_counter = if_not_exists(update_counter, :_start) + :_inc"
return table.update_item(
Key=item_key,
UpdateExpression=update_expression,
ExpressionAttributeNames=expression_attribute_names,
ExpressionAttributeValues=expression_attribute_values,
ReturnValues="ALL_OLD"
)
Hope it might be useful to someone!
In a simple way you can use below code to update item value with new one:
response = table.update_item(
Key={"my_id_name": "my_id_value"}, # to get record
UpdateExpression="set item_key_name=:item_key_value", # Operation action (set)
ExpressionAttributeValues={":value": "new_value"}, # item that you need to update
ReturnValues="UPDATED_NEW" # optional for declarative message
)
Simple example with multiple fields:
import boto3
dynamodb_client = boto3.client('dynamodb')
dynamodb_client.update_item(
TableName=table_name,
Key={
'PK1': {'S': 'PRIMARY_KEY_VALUE'},
'SK1': {'S': 'SECONDARY_KEY_VALUE'}
}
UpdateExpression='SET #field1 = :field1, #field2 = :field2',
ExpressionAttributeNames={
'#field1': 'FIELD_1_NAME',
'#field2': 'FIELD_2_NAME',
},
ExpressionAttributeValues={
':field1': {'S': 'FIELD_1_VALUE'},
':field2': {'S': 'FIELD_2_VALUE'},
}
)
using previous answer from eltbus , it worked for me , except for minor bug,
You have to delete the extra comma using update_expression[:-1]

Categories

Resources