ParamValidationError: Parameter validation failed: Bucket name must match the regex - python

I'm trying to run a Glue job by calling it from lambda function. The glue job in itself is running perfectly fine but when I trigger it from lambda function, I get the below error:
[ERROR] ParamValidationError: Parameter validation failed: Bucket name must match the regex \"^[a-zA-Z0-9.\\-_]{1,255}$\" or be an ARN matching the regex \"^arn:(aws).*:(s3|s3-object-lambda):[a-z\\-0-9]*:[0-9]{12}:accesspoint[/:][a-zA-Z0-9\\-.]{1,63}$|^arn:(aws).*:s3-outposts:[a-z\\-0-9]+:[0-9]{12}:outpost[/:][a-zA-Z0-9\\-]{1,63}[/:]accesspoint[/:][a-zA-Z0-9\\-]{1,63}$\""
There is no issue in my bucket name as I am able to do different actions with it and also my glue job is working fine when running it standalone.
Any help would be appreciated.
Thanks in advance.

Maybe you are including the s3:// protocol when indicating the bucket name and it is not required.

I was able to solve it by making a few changes.
My initial code was:
import json
import os
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
import boto3
client = boto3.client('glue')
glueJobName = "MyTestJob"
def lambda_handler(event, context):
logger.info('## INITIATED BY EVENT: ')
logger.info(event['detail'])
response = client.start_job_run(JobName = glueJobName)
logger.info('## STARTED GLUE JOB: ' + glueJobName)
logger.info('## GLUE JOB RUN ID: ' + response['JobRunId'])
return response
Once I removed the logging part (code below), it worked without any error:
from __future__ import print_function
import boto3
import urllib
print('Loading function')
glue = boto3.client('glue')
def lambda_handler(event, context):
gluejobname="MyTestJob"
runId = glue.start_job_run(JobName=gluejobname)
status = glue.get_job_run(JobName=gluejobname, RunId=runId['JobRunId'])
print("Job Status : ", status['JobRun']['JobRunState'])
What could be the issue here?
Thanks

Related

Describing AWS SSM parameters using tags failing

I am running a lambda to fetch ssm parameters in AWS and I want to filter using tags. I have tried the method recommended by aws but I keep getting this error:
"errorMessage": "An error occurred (ValidationException) when calling the DescribeParameters operation: An error occurred while calling one AWS dependency service."
Any help as to why?
reference - https://docs.aws.amazon.com/systems-manager/latest/userguide/parameter-search.html
https://github.com/spulec/moto/blob/master/tests/test_ssm/test_ssm_boto3.py#L1043-L1045
This is the code that I am running:
import json
import boto3
ssm_client = boto3.client('ssm')
def lambda_handler(event, context):
print("Fetch group A ssm parameter: ")
leg_one_parameter = ssm_client.describe_parameters(
ParameterFilters=[{"Key": "tag:group", "Values":["A"]}]
)['Parameters']
parameter_name = list(map(lambda parameter: parameter['Name'], leg_one_parameter))
return parameter_name
It should work. Validation error may be produced by a bug in the boto3... Once or twice I had a similar issue.
I tried to execute your request and it passed.
leg_one_parameter = ssm.describe_parameters(
ParameterFilters=[{"Key": "tag:group", "Values":["A"]}]
)['Parameters']
leg_one_parameter
[]
I used boto3 in version 1.24.3

S3 boto3 refuses to overwrite endpoint URL

I'm working on an internal S3 service (not AWS one). When I provide hard coded credentials, region and endpoint_url, boto3 seems to ignore them.
I came to that conclusion because it is attempting to go on internet (by using a public aws endpoint URL instead of the internal I have provided) but it does not work because of the following proxy error. But he should not go on internet, since it is an internal S3 service :
botocore.exceptions.ProxyConnectionError: Failed to connect to proxy URL: "http://my_company_proxy"
Here is my code
import io
import os
import boto3
import pandas as pd
# Method 1 : Client #########################################
s3_client = boto3.client(
's3',
region_name='EU-WEST-1',
aws_access_key_id='xxx',
aws_secret_access_key='zzz',
endpoint_url='https://my_company_enpoint_url'
)
# ==> at this point no error, but I don't know the value of endpoint_url
# Read bucket
bucket = "bkt-udt-arch"
file_name = "banking.csv"
print("debug 1") # printed OK
obj = s3_client.get_object(Bucket= bucket, Key= file_name)
# program stops here :
botocore.exceptions.ProxyConnectionError: Failed to connect to proxy URL: "http://my_company_proxy"
print("debug 2") # not printed -
initial_df = pd.read_csv(obj['Body']) # 'Body' is a key word
print("debug 3")
# Method 2 : Resource #########################################
# use third party object storage
s3 = boto3.resource('s3', endpoint_url='https://my_company_enpoint_url',
aws_access_key_id='xxx',
aws_secret_access_key='zzz',
region_name='EU-WEST-1'
)
print("debug 4") # Printed OK if method 1 is commented
# Print out bucket names
for bucket in s3.buckets.all():
print(bucket.name)
Thank you for the review
It was indeed a proxy problem : when http_prxoxy env variable is disabled, it works fine.

S3 unit tests boto client

Having issues writing a unit test for S3 client, it seems the test is trying to use a real s3 client rather than the one i have created for the test here is my example
#pytest.fixture(autouse=True)
def moto_boto(self):
# setup: start moto server and create the bucket
mocks3 = mock_s3()
mocks3.start()
res = boto3.resource('s3')
bucket_name: str = f"{os.environ['BUCKET_NAME']}"
res.create_bucket(Bucket=bucket_name)
yield
# teardown: stop moto server
mocks3.stop()
def test_with_fixture(self):
from functions.s3_upload_worker import (
save_email_in_bucket,
)
client = boto3.client('s3')
bucket_name: str = f"{os.environ['BUCKET_NAME']}"
client.list_objects(Bucket=bucket_name)
save_email_in_bucket(
"123AZT",
os.environ["BUCKET_FOLDER_NAME"],
email_byte_code,
)
This results in the following error
botocore.exceptions.ClientError: An error occurred (ExpiredToken) when calling the PutObject operation: The provided token has expired.
code i am testing looks like this
def save_email_in_bucket(message_id, bucket_folder_name, body):
s3_key = "".join([bucket_folder_name, "/", str(message_id), ".json"])
s3_client.put_object(
Bucket=bucket,
Key=s3_key,
Body=json.dumps(body),
ContentType="application-json",
)
LOGGER.info(
f"Saved email with messsage ID {message_id} in bucket folder {bucket_folder_name}"
)
Not accepting this an an answer but useful for anyone who ends up here, I found a workaround where if I create the s3 client in the function i am trying to test then this approach will work rather than create it globally. I would prefer to find an actual solution though.

Mocking a lambda response with moto

Somewhere in my code, a lambda is called to return a true/false response. I am trying to mock this lambda in my unit tests with no success.
This is my code:
def _test_update_allowed():
old = ...
new = ...
assert(is_update_allowed(old, new) == True)
Internally, is_update_allowed calls the lambda, which is what I want to mock.
I tried adding the following code above my test:
import zipfile
import io
import boto3
import os
#pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
CLIENT = boto3.client('lambda', region_name='us-east-1')
# Expected response setup and zip file for lambda mock creation
def lambda_event():
code = '''
def lambda_handler(event, context):
return event
'''
zip_output = io.BytesIO()
zip_file = zipfile.ZipFile(zip_output, 'w', zipfile.ZIP_DEFLATED)
zip_file.writestr('lambda_function.py', code)
zip_file.close()
zip_output.seek(0)
return zip_output.read()
# create mocked lambda with zip file
def mock_some_lambda(lambda_name, return_event):
return CLIENT.create_function(
FunctionName=lambda_name,
Runtime='python2.7',
Role='arn:aws:iam::123456789:role/does-not-exist',
Handler='lambda_function.lambda_handler',
Code={
'ZipFile': return_event,
},
Publish=True,
Timeout=30,
MemorySize=128
)
and then updated my test to:
#mock_lambda
def _test_update_allowed():
mock_some_lambda('hello-world-lambda', lambda_event())
old = ...
new = ...
assert(is_update_allowed(old, new) == True)
But I'm getting the following error, which makes me think it's actually trying to talk to AWS
botocore.exceptions.ClientError: An error occurred (UnrecognizedClientException) when calling the CreateFunction operation: The security token included in the request is invalid.
From the error message, I can confirm it definitely not an AWS issue. It is clearly stating that it is trying to use some credentials which are not valid. So that boils down to the code.
I am assuming you already have import statements for necessary libs because those are also not visible in the shared code
import pytest
import moto
from mock import mock, patch
from moto import mock_lambda
So you need to use the
def aws_credentials():
.....
while creating the client because from the code I dont see that you are using the same.
#pytest.fixture(scope='function')
def lambda_mock(aws_credentials):
with mock_lambda():
yield boto3.client('lambda', region_name='us-east-1')
and eventually your mock
#pytest.fixture(scope='function')
def mock_some_lambda(lambda_mock):
lambda_mock.create_function(
FunctionName=lambda_name,
Runtime='python2.7',
Role='arn:aws:iam::123456789:role/does-not-exist',
Handler='lambda_function.lambda_handler',
Code={
'ZipFile': return_event,
},
Publish=True,
Timeout=30,
MemorySize=128
)
yield
then test function
def _test_update_allowed(lambda_mock,mock_some_lambda):
lambda_mock.invoke(...)
.....
Cant give a working example, because not sure what the full logic is. Between take a look this post.
The problems seems due to unexisting arn role. Try mocking it like in moto library tests
def get_role_name():
with mock_iam():
iam = boto3.client("iam", region_name=_lambda_region)
try:
return iam.get_role(RoleName="my-role")["Role"]["Arn"]
except ClientError:
return iam.create_role(
RoleName="my-role",
AssumeRolePolicyDocument="some policy",
Path="/my-path/",
)["Role"]["Arn"]

boto3 check if Athena database exists

Im making a script that creates a database in AWS Athena and then creates tables for that database, today the DB creation was taking ages, so the tables being created referred to a db that doesn't exists, is there a way to check if a DB is already created in Athena using boto3?
This is the part that created the db:
client = boto3.client('athena')
client.start_query_execution(
QueryString='create database {}'.format('db_name'),
ResultConfiguration=config
)
# -*- coding: utf-8 -*-
import logging
import os
from time import sleep
import boto3
import pandas as pd
from backports.tempfile import TemporaryDirectory
logger = logging.getLogger(__name__)
class AthenaQueryFailed(Exception):
pass
class Athena(object):
S3_TEMP_BUCKET = "please-replace-with-your-bucket"
def __init__(self, bucket=S3_TEMP_BUCKET):
self.bucket = bucket
self.client = boto3.Session().client("athena")
def execute_query_in_athena(self, query, output_s3_directory, database="csv_dumps"):
""" Useful when client executes a query in Athena and want result in the given `s3_directory`
:param query: Query to be executed in Athena
:param output_s3_directory: s3 path in which client want results to be stored
:return: s3 path
"""
response = self.client.start_query_execution(
QueryString=query,
QueryExecutionContext={"Database": database},
ResultConfiguration={"OutputLocation": output_s3_directory},
)
query_execution_id = response["QueryExecutionId"]
filename = "{filename}.csv".format(filename=response["QueryExecutionId"])
s3_result_path = os.path.join(output_s3_directory, filename)
logger.info(
"Query query_execution_id <<{query_execution_id}>>, result_s3path <<{s3path}>>".format(
query_execution_id=query_execution_id, s3path=s3_result_path
)
)
self.wait_for_query_to_complete(query_execution_id)
return s3_result_path
def wait_for_query_to_complete(self, query_execution_id):
is_query_running = True
backoff_time = 10
while is_query_running:
response = self.__get_query_status_response(query_execution_id)
status = response["QueryExecution"]["Status"][
"State"
] # possible responses: QUEUED | RUNNING | SUCCEEDED | FAILED | CANCELLED
if status == "SUCCEEDED":
is_query_running = False
elif status in ["CANCELED", "FAILED"]:
raise AthenaQueryFailed(status)
elif status in ["QUEUED", "RUNNING"]:
logger.info("Backing off for {} seconds.".format(backoff_time))
sleep(backoff_time)
else:
raise AthenaQueryFailed(status)
def __get_query_status_response(self, query_execution_id):
response = self.client.get_query_execution(QueryExecutionId=query_execution_id)
return response
As pointed in above answer, Athena Waiter is still not there implemented.
I use this light weighted Athena client to do the query, it returns the s3 path of result when the query is completed.
The waiter functions for Athena are not implemented yet: Athena Waiter
See: Support AWS Athena waiter feature for a possible workaround until it is implemented in Boto3. This is how it is implemented in AWS CLI.
while True:
stats = self.athena.get_query_execution(execution_id)
status = stats['QueryExecution']['Status']['State']
if status in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
break
time.sleep(0.2)

Categories

Resources