I want to import CSV file to DynamoDB. AWS does not have this option right know so need to use existing solution with some python script: https://github.com/aws-samples/csv-to-dynamodb
Everything works fine when I specify partition Key as String:
"AttributeDefinitions":[
{
"AttributeName": "Code",
"AttributeType": "S"
}
],
"KeySchema":[
{
"AttributeName": "Code",
"KeyType": "HASH"
}
],
When I specify it as Number And try to run CSV import script, it returns error: The provided key element does not match the schema.
"AttributeDefinitions":[
{
"AttributeName": "Code",
"AttributeType": "N"
}
],
"KeySchema":[
{
"AttributeName": "Code",
"KeyType": "HASH"
}
],
Here is the python code, that handles this:
import json
import boto3
import os
import csv
import codecs
import sys
s3 = boto3.resource('s3')
dynamodb = boto3.resource('dynamodb')
bucket = os.environ['bucket']
key = os.environ['key']
tableName = os.environ['table']
def lambda_handler(event, context):
#get() does not store in memory
try:
obj = s3.Object(bucket, key).get()['Body']
except Exception as error:
print(error)
print("S3 Object could not be opened. Check environment variable. ")
try:
table = dynamodb.Table(tableName)
except Exception as error:
print(error)
print("Error loading DynamoDB table. Check if table was created correctly and environment variable.")
batch_size = 100
batch = []
#DictReader is a generator; not stored in memory
for row in csv.DictReader(codecs.getreader('utf-8-sig')(obj)):
if len(batch) >= batch_size:
write_to_dynamo(batch)
batch.clear()
batch.append(row)
if batch:
write_to_dynamo(batch)
return {
'statusCode': 200,
'body': json.dumps('Uploaded to DynamoDB Table')
}
def write_to_dynamo(rows):
try:
table = dynamodb.Table(tableName)
except Exception as error:
print(error)
print("Error loading DynamoDB table. Check if table was created correctly and environment variable.")
try:
with table.batch_writer() as batch:
for i in range(len(rows)):
batch.put_item(
Item=rows[i]
)
except Exception as error:
print(error)
print("Error executing batch_writer")
Does the code needs to be changed to send partitionKey as Number or does it need to be set up differently in configuration?
I need to write a unit test for the AWS lambda handler. I want to give some JSON files to the handler method and get some output. But I come across trouble with mocking the S3 "event".
My lambda handler looks like this:
def handler(event, context):
print(f'Event: {event}')
s3 = boto3.resource('s3')
bucket = s3.Bucket(event["bucket"])
word_correction = correction.WordCorrection()
for obj in bucket.objects.all():
key = obj.key
body = obj.get()['Body'].read()
data = get_data_from_file(body)
if key.endswith('.json'):
try:
word_correction.create_duplicated_words_file(data)
except Exception as e:
print(e)
return {
"success": False,
"response": f"Failed to read file - {e}"
}
try:
corrected_word_list = word_correction.spell_words(json.loads(body))
except Exception as e:
print(e)
return {
"success": False,
"response": f"Failed to correct words - {e}"
}
else:
return {
"success": False,
"response": "Invalid file type. File must have .json extension."
}
return {
"success": True,
"response": corrected_word_list
}
Here I put some JSON data to my module word_correction.
From this point my unit test
S3_BUCKET_NAME = 'dev-ayazv-lambda-spell-correction'
DEFAULT_REGION = 'us-east-1'
S3_TEST_FILE_KEY = 'pdfs/manual_test/page-data-page-9.json'
S3_TEST_FILE_CONTENT = {"Blocks": [{"BlockType": "WORD", "Confidence": 93.18, "Text": "Test"}]}
#mock_s3
class TestLambdaFunction(unittest.TestCase):
def setUp(self):
self.s3 = boto3.resource('s3', region_name=DEFAULT_REGION)
self.s3_bucket = self.s3.create_bucket(Bucket=S3_BUCKET_NAME)
self.s3_bucket.put_object(Key=S3_TEST_FILE_KEY,
Body=json.dumps(S3_TEST_FILE_CONTENT))
def test_get_data_from_file(self):
from functions.spell_correction.src.index import get_data_from_file
json_encode_data = json.dumps(S3_TEST_FILE_CONTENT, indent=2).encode('utf-8')
file_content = get_data_from_file(json_encode_data)
self.assertEqual(file_content, S3_TEST_FILE_CONTENT)
def test_handler(self):
from functions.spell_correction.src.index import handler
event = {
'bucket': {
'name': S3_BUCKET_NAME
},
'object': {
'key': S3_TEST_FILE_KEY
}
}
result = handler(event, {})
self.assertEqual(result, {"success": True, "response": []})
I was trying to mock the S3_Bucket S3_File_Key and S3_File Content.
But I face with the problem that my tests drops in this moment
bucket = {'name': 'dev-ayazv-lambda-spell-correction'}
def validate_bucket_name(params, **kwargs):
if 'Bucket' not in params:
return
bucket = params['Bucket']
> if not VALID_BUCKET.search(bucket) and not VALID_S3_ARN.search(bucket):
E TypeError: expected string or bytes-like object
Hello guys this will be my first post here as I am learning how to code. When I try to update my table in Dynamodb using a lambda function I get the following error message. "The provided key element does not match the schema" my table name is correct and I am able to connect to it. My primary key is just a hash key which is id. its value is 1 so I do not see why it is giving me this error here.
import json
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('Visitors')
def lambda_handler (event, context):
response = table.update_item(
Key={
"id": {"N":"1"}
},
ExpressionAttributeNames = {
"#c": "Counters"
},
UpdateExpression= "set #c = :val",
ExpressionAttributeValues={
":val": {"N":"1"}
}
)
Since you are using the table resource, you should refer to this documentation. For example, the Key parameter should have the following syntax:
Key={
'string': 'string'|123|Binary(b'bytes')|True|None|set(['string'])|set([123])|set([Binary(b'bytes')])|[]|{}
}
This means that the DynamoDB data type is inferred from the Python data type. So instead of {"N":"1"}, you can use 1 directly. Here is a corrected version of your code snippet:
import json
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('Visitors')
def lambda_handler (event, context):
response = table.update_item(
Key={
"id": 1
},
ExpressionAttributeNames = {
"#c": "Counters"
},
UpdateExpression= "set #c = :val",
ExpressionAttributeValues={
":val": 1
}
)
I have a function called check_stuff and it instantiates an object and calls the function describe_continuous_backups, however, moto doesn't have support for this yet so I need to manually mock it myself. I have the following but it doesn't seem like I'm able to patch the object. How can I go about this?
def check_stuff(profile, table_name):
session = boto3.Session(profile_name=profile)
client = session.client('dynamodb', 'eu-west-1')
some_stuff = client.describe_continuous_backups(TableName=table_name)
#dostuff
#mock_dynamodb2
#mock.patch('boto3.Session.client.describe_continuous_backups', return_value={'foo': 'bar'})
def test_continuous_backup_disabled(self):
table = self.client.create_table(
TableName='Movies',
KeySchema=[
{
'AttributeName': 'year',
'KeyType': 'HASH'
},
{
'AttributeName': 'title',
'KeyType': 'RANGE'
}
],
AttributeDefinitions=[
{
'AttributeName': 'year',
'AttributeType': 'N'
},
{
'AttributeName': 'title',
'AttributeType': 'S'
},
],
ProvisionedThroughput={
'ReadCapacityUnits': 10,
'WriteCapacityUnits': 10
}
)
result = check_stuff('myprofile', 'some_table')
I can try and mock the client like so:
mocker.patch('mypackage.boto3.Session.client', ....)
But the problem with that is that it mocks the client itself. I need to mock a function that doesn't necessarily exist while retaining the rest of the functionality.
boto3.client returns an instance of a dynamically-created class based on the service_name argument (see source), so you cannot use the patch method, which requires that the target object be importable.
Instead, you can patch botocore.client.ClientCreator._create_methods, the method that dynamically creates methods for the class that boto3.client returns, with a wrapper function that makes the describe_continuous_backups attribute a Mock object with the given return_value:
import boto3
import botocore
from unittest.mock import patch, Mock
def override(*args, **kwargs):
def wrapper(self, service_model):
op_dict = original_create_methods(self, service_model)
if 'describe_continuous_backups' in op_dict:
op_dict['describe_continuous_backups'] = Mock(*args, **kwargs)
return op_dict
return wrapper
original_create_methods = botocore.client.ClientCreator._create_methods
#patch('botocore.client.ClientCreator._create_methods', override(return_value={'foo': 'bar'}))
def check_stuff():
session = boto3.Session()
client = session.client('dynamodb', 'eu-west-1')
some_stuff = client.describe_continuous_backups(TableName='')
return some_stuff
print(check_stuff())
This outputs:
{'foo': 'bar'}
I'm a new user in boto3 and i'm using DynamoDB.
I went through over the DynamoDB api and I couldn't find any method which tell me if a table is already exists.
What is the best approach dealing this issue?
Should I try to create a new table and wrap it using try catch ?
From reading the documentation, I can see that there are three methods by which you can check if a table exists.
The CreateTable API throws an error ResourceInUseException if the table already exists. Wrap the create_table method with try except to catch this
You can use the ListTables API to get the list of table names associated with the current account and endpoint. Check if the table name is present in the list of table names you get in the response.
The DescribeTable API will throw an error ResourceNotFoundException if the table name you request doesn't exist.
To me, the first option sounds better if you just want to create a table.
Edit:
I see that some people are finding it difficult to catch the exceptions. I will put some code below for you to know how to handle exceptions in boto3.
Example 1
import boto3
dynamodb_client = boto3.client('dynamodb')
try:
response = dynamodb_client.create_table(
AttributeDefinitions=[
{
'AttributeName': 'Artist',
'AttributeType': 'S',
},
{
'AttributeName': 'SongTitle',
'AttributeType': 'S',
},
],
KeySchema=[
{
'AttributeName': 'Artist',
'KeyType': 'HASH',
},
{
'AttributeName': 'SongTitle',
'KeyType': 'RANGE',
},
],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5,
},
TableName='test',
)
except dynamodb_client.exceptions.ResourceInUseException:
# do something here as you require
pass
Example 2
import boto3
dynamodb_client = boto3.client('dynamodb')
table_name = 'test'
existing_tables = dynamodb_client.list_tables()['TableNames']
if table_name not in existing_tables:
response = dynamodb_client.create_table(
AttributeDefinitions=[
{
'AttributeName': 'Artist',
'AttributeType': 'S',
},
{
'AttributeName': 'SongTitle',
'AttributeType': 'S',
},
],
KeySchema=[
{
'AttributeName': 'Artist',
'KeyType': 'HASH',
},
{
'AttributeName': 'SongTitle',
'KeyType': 'RANGE',
},
],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5,
},
TableName=table_name,
)
Example 3
import boto3
dynamodb_client = boto3.client('dynamodb')
try:
response = dynamodb_client.describe_table(TableName='test')
except dynamodb_client.exceptions.ResourceNotFoundException:
# do something here as you require
pass
import boto3
from botocore.exceptions import ClientError
TABLE_NAME = "myTableName"
dynamodb = boto3.resource('dynamodb', endpoint_url="https://dynamodb.us-east-1.amazonaws.com")
table = dynamodb.Table(TABLE_NAME)
try:
response = client.describe_table(TableName=TABLE_NAME)
except ClientError as ce:
if ce.response['Error']['Code'] == 'ResourceNotFoundException':
print "Table " + TABLE_NAME + " does not exist. Create the table first and try again."
else:
print "Unknown exception occurred while querying for the " + TABLE_NAME + " table. Printing full error:"
pprint.pprint(ce.response)
Alternate approach if you do not want to use boto3.client but only boto3.resource:
import boto3
database = boto3.resource('dynamodb', endpoint_url="http://localhost:8000")
table_name = 'MyTable'
table_names = [table.name for table in database.tables.all()]
if table_name in table_names:
print('table', table_name, 'exists')
You can use describe table API to determine whether the table exists.
Sample code:
from __future__ import print_function # Python 2/3 compatibility
import os
os.environ["TZ"] = "UTC"
import boto3
client = boto3.client('dynamodb', region_name='us-west-2', endpoint_url="http://localhost:8000")
response = client.describe_table(
TableName='Movies'
)
print(response)
If table exists:-
You will get the response
If table doesn't exists:-
You will get ResourceNotFoundException
botocore.errorfactory.ResourceNotFoundException: An error occurred (ResourceNotF
oundException) when calling the DescribeTable operation: Cannot do operations on
a non-existent table
Another way:-
Alternatively, you could use table.wait_until_exists(). from the docs:
Waits until this Table is exists. This method calls
DynamoDB.Waiter.table_exists.wait() which polls.
DynamoDB.Client.describe_table() every 20 seconds until a successful
state is reached. An error is returned after 25 failed checks.
See also: AWS API Documentation
Request Syntax
table.wait_until_exists()
Returns
None
You can use .table_status attr of any boto3 Table instance object. It returns it's status if exists (CREATING, UPDATING, DELETING, ACTIVE) or throws exception botocore.exceptions.ClientError: Requested resource not found: Table: <YOUR_TABLE_NAME> not found. You can wrap those conditions into try / except to have full info on the current table state.
import boto3
from botocore.exceptions import ClientError
dynamodb = boto3.resource('dynamodb', region_name='us-west-2')
table = dynamodb.Table('your_table_name_str')
try:
is_table_existing = table.table_status in ("CREATING", "UPDATING",
"DELETING", "ACTIVE")
except ClientError:
is_table_existing = False
print "Table %s doesn't exist." % table.name
Note that it kind of depends on if you are using client or resource. If you use boto3.client(), you can use the 3 methods the accepted answer suggested. If you are using boto3.resource(), you can only use dynamodb_resource.create_table() and check exceptions.
try:
table = dynamodb_resource.create_table(
...
)
table.meta.client.get_waiter('table_exists').wait(TableName=your_table_name)
except ResourceInUseException:
# do sth here
I know this will have a slight risk if there are more than 10 tables between table.split(0, -1) and table. However, it does save throwing exceptions and the like.
Alas the documentation https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_ListTables.html (examples) imply the the first table in the returned list will be the search item, it is not the case..
class Dynamo {
private m_db!: DynamoDB;
private async ensure_table(name: string) {
const search = await this.db().listTables({
ExclusiveStartTableName: name.slice(0, -1),
Limit: 10 });
const exists = search.TableNames?.includes(name);
exists || await this.create_table(name);
}
private async create_table(name: string) {
// create the table here
}
private db(): DynamoDB {
return this.m_db || (this.m_db = this.create_db());
}
private create_db = (): DynamoDB => {
return new DynamoDB({apiVersion: "2012-08-10"}); }
}
}
You can use the convenient resource API while still handling and catching the exceptions from the client API level, because you can access the client from the resource! This makes the method of checking if table exists the most elegant I have found:
resource = boto3.resource('dynamodb', region_name='eu-north-1')
def table_exists(table_name: str) -> bool:
try:
resource.Table(table_name).table_status
except resource.meta.client.exceptions.ResourceNotFoundException:
return False
return True
print(table_exists('dummy_table'))