How to delete nested JSON attribute in dynamodb / python - python

I have a simple dynamodb database that uses "League" as a partition key and "Team" as a sort key to store all roster data under a "Players" attribute field that uses a JSON format. I would like to access and delete a specific player (Larry Bird or Jayson Tatum in this case), however, I am having trouble accessing the schema correctly to delete the specific key and values, especially given that Players.Jayson Tatum wont work because it is two separate words. Here is the basic skeleton code for the function so far:
def lambda_handler(event, context):
newLeague = None
newTeam = None
newPlayer = None
statusCode = 200
if checkKey(event, 'League'):
newLeague = event['League']
if checkKey(event, 'Team'):
newTeam = event['Team']
if checkKey(event, 'Player'):
newPlayer = event['Player']
if newLeague != None and newTeam != None and newPlayer != None:
retrievedData = table.delete_item(
Key = {
'League': newLeague,
'Team': newTeam,
}
)
Database layout in Dynamodb for reference

Removing a nested item in DynamoDB can be done by using the REMOVE-expression in the update_item-method:
client.update_item(
TableName=table_name,
Key={"League": {"S": "NBA"}, "Team": {"S": "Celtics"}},
UpdateExpression="REMOVE Players.#p",
ExpressionAttributeNames={"#p": "Larry Bird"},
)
Note the ExpressionAttributeNames, to get around the fact that the player name has a space in it.
Adding a player is done in a similar fashion:
client.update_item(
TableName=table_name,
Key={"League": {"S": "NBA"}, "Team": {"S": "Celtics"}},
UpdateExpression="SET Players.#p = :v",
ExpressionAttributeNames={"#p": "Larry Bird"},
ExpressionAttributeValues={":v": {"M": {..}}}
)

Related

using dict.update is overwriting the string of the key itself AND the key's value

I am working on a CLI for AWS, and I'm trying to get all instances of all EC2 into a single dict, across all regions.
def get_all_ec2_instances_in_all_regions(self):
ec2_instance_list = {}
region_list = list_ec2_instance_regions() #this returns a list of regions to iterate on
print('Finding Regions')
for region in region_list:
region_name = region['RegionName']
ec2 = boto3.client('ec2', region_name=region_name)
regional_instance_list_return = ec2.describe_instances()['Reservations']
if len(regional_instance_list_return) == 0:
ec2_instance_list.update({
region_name: regional_instance_list_return
})
for reservation in regional_instance_list_return:
instance_id= reservation['Instances'][0]['InstanceId']
ec2_instance_list.update({
region_name: {
instance_id: reservation['Instances'][0]
}
})
print('Region search complete')
print(prettyPrintDict(ec2_instance_list)) #prettyPrintDict just console logs the dict in a nicer format for human readability
The resulting object only has one single dict object with the instance_id as the key, even though regional_instance_list_return actually has a list with multiple objects in it.
I would figure this code would add dictionaries with each InstanceID Like this
{ region1_name:
{ instance_id1: {instance1 data},
instance_id2: {instance2 data},
instance_id3: {instance3 data}
}
region2_name:
{ instance_id1: {instance1 data},
instance_id2: {instance2 data},
instance_id3: {instance3 data},
instance_id4: {instance4 data}
}
... and so on
}
but the resulting dict actually looks like this when it's finished:
{ region1_name:
{
instance_id3: {instance3 data}
}
region2_name:
{
instance_id4: {instance4 data}
}
... and so on
}
It doesn't actually add each instance, it just overwrites the instance_id key (Which is unique for each instance) and the key's values.
I was under the impression that if a key is unique, and you use dict.update() it'll just add them all without overwriting? What am I doing wrong?
dict.update takes keyword assignment arguments and converts the keyword into the dict key. You are telling it that the key is 'instance_id', not the value assigned to the variable instance_id. Instead of dict.update, try the syntax dict[key] = value, which appends a new key/value pair if key does not already exist.
Ok so in the original code, it was overwriting the region each time with the dict.update so I had to make a few small modifications to make sure that the update was happening in the right leaf of the object:
def get_all_ec2_instances_in_all_regions(self):
ec2_instance_list = {}
region_list = self.list_ec2_instance_regions()
print('Finding Regions')
for region in region_list:
region_name = region['RegionName']
self.ec2 = boto3.client('ec2', region_name=region_name)
regional_instance_list_return = self.ec2.describe_instances()['Reservations']
ec2_instance_list.update({
region_name: {}
}) #update the region name no matter if there are instances or not
for reservation in regional_instance_list_return:
instance_id = reservation['Instances'][0]['InstanceId']
ec2_instance_list[region_name].update({
# update the list starting at the
# root of the region_name, not one
# level up the leaf as I had before
instance_id: reservation['Instances'][0]
})

I want to firstly create a database and then update it as per the values in mongodb

I want to update the value of Entry1 using upsert. I have a sensor that returns the value of Entry1. If sensor is blocked, the value is true. If sensor is not blocked then the value is False.
machineOne = None
oneIn = 1
while True:
global machineOneId
global userId
try:
if Entry1.get_value() and oneIn < 2:
machineOne = Entry1.get_value()
print('entered looopp ONeeeE', machineOne)
machine1 = {
'Entry1': Entry1.get_value(),
'Exit1': Exit1.get_value(),
'id': 'test'
}
result = Machine1.insert_one(machine1)
myquery = {"Entry1": 'true'}
newvalues = {"$set": {"id": result.inserted_id}}
#result = Machine1.insert_one(machine1)
Machine1.update_one(myquery, newvalues)
userId = result.inserted_id
oneIn += 1
print('added', result.inserted_id, oneIn)
elif machineOne:
print('entered looopp', userId)
myquery = {"id": userId}
newvalues = {"$set": {"id": Entry1.get_value()}}
upsert = True
#result = Machine1.insert_one(machine1)
Machine1.update_one(myquery, newvalues)
if Exit1.get_value():
print('added',)
finally:
print('nothings happened', machineOne)
what is expected: i should be able to update the Entry1 from true to false in the same log, displayed in robo3t
Good Afternoon #digs10 ,
I read your post and I think the error is how you locate the document that you want to update.
I remember that MongoDB document primary key is "_id" instead of "id". You could take a look here MongoDB Documents
For what I see in the code (I don't know Python but it is readable), you are referring to the document "Entry1" using the field "id" instead of "_id.
Try modifing the line myquery = {"id": userId} for myquery = {"_id": userId}.
I hope this answer can help you.
Best Regards,
JB
P.S: I saw this question in my email and I took a quick read of it, If I misunderstood it, please let me know.

Structuring request JSON for API

I'm building a small API to interact with our database for other projects. I've built the database and have the API functioning fine, however, the data I get back isn't structured how I want it.
I am using Python with Flask/Flask-Restful for the API.
Here is a snippet of my Python that handles the interaction:
class Address(Resource):
def get(self, store):
print('Received a request at ADDRESS for Store ' + store )
conn = sqlite3.connect('store-db.db')
cur = conn.cursor()
addresses = cur.execute('SELECT * FROM Sites WHERE StoreNumber like ' + store)
for adr in addresses:
return(adr, 200)
If I make a request to the /sites/42 endpoint, where 42 is the site id, this is what I'll receive:
[
"42",
"5000 Robinson Centre Drive",
"",
"Pittsburgh",
"PA",
"15205",
"(412) 787-1330",
"(412) 249-9161",
"",
"Dick's Sporting Goods"
]
Here is how it is structured in the database:
Ultimately I'd like to use the column name as the Key in the JSON that's received, but I need a bit of guidance in the right direction so I'm not Googling ambiguous terms hoping to find something.
Here is an example of what I'd like to receive after making a request to that endpoint:
{
"StoreNumber": "42",
"Street": "5000 Robinson Centre Drive",
"StreetSecondary": "",
"City": "Pittsburgh",
"State": "PA",
"ZipCode": "15205",
"ContactNumber": "(412) 787-1330",
"XO_TN": "(412) 249-9161",
"RelocationStatus": "",
"StoreType": "Dick's Sporting Goods"
}
I'm just looking to get some guidance on if I should change how my data is structured in the database (i.e. I've seen some just put the JSON in their database, but I think that's messy) or if there's a more intuitive method I could use to control my data.
Updated Code using Accepted Answer
class Address(Resource):
def get(self, store):
print('Received a request at ADDRESS for Store ' + store )
conn = sqlite3.connect('store-db.db')
cur = conn.cursor()
addresses = cur.execute('SELECT * FROM Sites WHERE StoreNumber like ' + store)
for r in res:
column_names = ["StoreNumber", "Street", "StreetSecondary","City","State", "ZipCode", "ContactNumber", "XO_TN", "RelocationStatus", "StoreType"]
data = [r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]]
datadict = {column_names[itemindex]:item for itemindex, item in enumerate(data)}
return(datadict, 200)
You could just convert your list to a dict and then parse it to a JSON string before passing it back out.
// These are the names of the columns in your database
>>> column_names = ["storeid", "address", "etc"]
// This is the data coming from the database.
// All data is passed as you are using SELECT * in your query
>>> data = [42, "1 the street", "blah"]
// This is a quick notation for creating a dict from a list
// enumerate means we get a list index and a list item
// as the columns are in the same order as the data, we can use the list index to pull out the column_name
>>> datadict = {column_names[itemindex]:item for itemindex, item in enumerate(data)}
//This just prints datadict in my terminal
>>> datadict
We now have a named dict containing your data and the column names.
{'etc': 'blah', 'storeid': 42, 'address': '1 the street'}
Now dump the datadict to a string so that it can be sent to the frontend.
>>> import json
>>> json.dumps(datadict)
The dict has now been converted to a string.
'{"etc": "blah", "storeid": 42, "address": "1 the street"}'
This would require no change to your database but the script would need to know about the column names or retrieve them dynamically using some SQL.
If the data in the database is in the correct format for passing to the frontend then you shouldn't need to change the database structure. If it was not in the correct format then you could either change the way it was stored or change your SQL query to manipulate it.

DynamoDB - How to query a nested attribute boto3

I am following the DynamoDB python tutorial. This step shows how to query the table based on a specific key: http://docs.aws.amazon.com/amazondynamodb/latest/gettingstartedguide/GettingStarted.Python.04.html.
Here is the code for this query:
from __future__ import print_function # Python 2/3 compatibility
import boto3
import json
import decimal
from boto3.dynamodb.conditions import Key, Attr
# Helper class to convert a DynamoDB item to JSON.
class DecimalEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, decimal.Decimal):
return str(o)
return super(DecimalEncoder, self).default(o)
dynamodb = boto3.resource('dynamodb', region_name='us-west-2', endpoint_url="http://localhost:8000")
table = dynamodb.Table('Movies')
print("Movies from 1992 - titles A-L, with genres and lead actor")
response = table.query(
ProjectionExpression="#yr, title, info.genres, info.actors[0]",
ExpressionAttributeNames={ "#yr": "year" }, # Expression Attribute Names for Projection Expression only.
KeyConditionExpression=Key('year').eq(1992) & Key('title').between('A', 'L')
)
for i in response[u'Items']:
print(json.dumps(i, cls=DecimalEncoder))
An example response item is
{
"title": "Juice",
"year": "1992",
"info": {
"actors": [
"Omar Epps"
],
"genres": [
"Crime",
"Drama",
"Thriller"
]
}
}
The table has the two key attributes 'title' and 'year' as well as the nested attribute 'info'. What I am trying to do is query the database and filter the movies by genre, for example get all Drama movies. I am not sure how to do this since the genre key is nested inside info.
I tried to get all the Drama movies from 1992 like this but it came up blank.
response = table.query(
KeyConditionExpression=Key('year').eq(1992),
FilterExpression=Attr('info.genres').eq('Drama')
)
How do I properly filter this query with the nested info attribute?
You can use contains to filter the data from List data type.
genres -attribute stored as List inside info attribute which is a Map data type
FilterExpression=Attr('info.genres').contains('Drama')
Unlike in the accepted answer, to be able to filter all the items with the attribute, you need to use scan() instead of query(). query() requires KeyCondition which is unnecessary in your case and forces you to create condition containing f.e. year.
Therefore
table.scan(FilterExpression=Attr('info.genres').contains('Drama'))
should do the job

Example of update_item in dynamodb boto3

Following the documentation, I'm trying to create an update statement that will update or add if not exists only one attribute in a dynamodb table.
I'm trying this
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET',
ConditionExpression='Attr(\'ReleaseNumber\').eq(\'1.0.179\')',
ExpressionAttributeNames={'attr1': 'val1'},
ExpressionAttributeValues={'val1': 'false'}
)
The error I'm getting is:
botocore.exceptions.ClientError: An error occurred (ValidationException) when calling the UpdateItem operation: ExpressionAttributeNames contains invalid key: Syntax error; key: "attr1"
If anyone has done anything similar to what I'm trying to achieve please share example.
Found working example here, very important to list as Keys all the indexes of the table, this will require additional query before update, but it works.
response = table.update_item(
Key={
'ReleaseNumber': releaseNumber,
'Timestamp': result[0]['Timestamp']
},
UpdateExpression="set Sanity = :r",
ExpressionAttributeValues={
':r': 'false',
},
ReturnValues="UPDATED_NEW"
)
Details on dynamodb updates using boto3 seem incredibly sparse online, so I'm hoping these alternative solutions are useful.
get / put
import boto3
table = boto3.resource('dynamodb').Table('my_table')
# get item
response = table.get_item(Key={'pkey': 'asdf12345'})
item = response['Item']
# update
item['status'] = 'complete'
# put (idempotent)
table.put_item(Item=item)
actual update
import boto3
table = boto3.resource('dynamodb').Table('my_table')
table.update_item(
Key={'pkey': 'asdf12345'},
AttributeUpdates={
'status': 'complete',
},
)
If you don't want to check parameter by parameter for the update I wrote a cool function that would return the needed parameters to perform a update_item method using boto3.
def get_update_params(body):
"""Given a dictionary we generate an update expression and a dict of values
to update a dynamodb table.
Params:
body (dict): Parameters to use for formatting.
Returns:
update expression, dict of values.
"""
update_expression = ["set "]
update_values = dict()
for key, val in body.items():
update_expression.append(f" {key} = :{key},")
update_values[f":{key}"] = val
return "".join(update_expression)[:-1], update_values
Here is a quick example:
def update(body):
a, v = get_update_params(body)
response = table.update_item(
Key={'uuid':str(uuid)},
UpdateExpression=a,
ExpressionAttributeValues=dict(v)
)
return response
The original code example:
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET',
ConditionExpression='Attr(\'ReleaseNumber\').eq(\'1.0.179\')',
ExpressionAttributeNames={'attr1': 'val1'},
ExpressionAttributeValues={'val1': 'false'}
)
Fixed:
response = table.update_item(
Key={'ReleaseNumber': '1.0.179'},
UpdateExpression='SET #attr1 = :val1',
ConditionExpression=Attr('ReleaseNumber').eq('1.0.179'),
ExpressionAttributeNames={'#attr1': 'val1'},
ExpressionAttributeValues={':val1': 'false'}
)
In the marked answer it was also revealed that there is a Range Key so that should also be included in the Key. The update_item method must seek to the exact record to be updated, there's no batch updates, and you can't update a range of values filtered to a condition to get to a single record. The ConditionExpression is there to be useful to make updates idempotent; i.e. don't update the value if it is already that value. It's not like a sql where clause.
Regarding the specific error seen.
ExpressionAttributeNames is a list of key placeholders for use in the UpdateExpression, useful if the key is a reserved word.
From the docs, "An expression attribute name must begin with a #, and be followed by one or more alphanumeric characters". The error is because the code hasn't used an ExpressionAttributeName that starts with a # and also not used it in the UpdateExpression.
ExpressionAttributeValues are placeholders for the values you want to update to, and they must start with :
Based on the official example, here's a simple and complete solution which could be used to manually update (not something I would recommend) a table used by a terraform S3 backend.
Let's say this is the table data as shown by the AWS CLI:
$ aws dynamodb scan --table-name terraform_lock --region us-east-1
{
"Items": [
{
"Digest": {
"S": "2f58b12ae16dfb5b037560a217ebd752"
},
"LockID": {
"S": "tf-aws.tfstate-md5"
}
}
],
"Count": 1,
"ScannedCount": 1,
"ConsumedCapacity": null
}
You could update it to a new digest (say you rolled back the state) as follows:
import boto3
dynamodb = boto3.resource('dynamodb', 'us-east-1')
try:
table = dynamodb.Table('terraform_lock')
response = table.update_item(
Key={
"LockID": "tf-aws.tfstate-md5"
},
UpdateExpression="set Digest=:newDigest",
ExpressionAttributeValues={
":newDigest": "50a488ee9bac09a50340c02b33beb24b"
},
ReturnValues="UPDATED_NEW"
)
except Exception as msg:
print(f"Oops, could not update: {msg}")
Note the : at the start of ":newDigest": "50a488ee9bac09a50340c02b33beb24b" they're easy to miss or forget.
Small update of Jam M. Hernandez Quiceno's answer, which includes ExpressionAttributeNames to prevent encoutering errors such as:
"errorMessage": "An error occurred (ValidationException) when calling the UpdateItem operation:
Invalid UpdateExpression: Attribute name is a reserved keyword; reserved keyword: timestamp",
def get_update_params(body):
"""
Given a dictionary of key-value pairs to update an item with in DynamoDB,
generate three objects to be passed to UpdateExpression, ExpressionAttributeValues,
and ExpressionAttributeNames respectively.
"""
update_expression = []
attribute_values = dict()
attribute_names = dict()
for key, val in body.items():
update_expression.append(f" #{key.lower()} = :{key.lower()}")
attribute_values[f":{key.lower()}"] = val
attribute_names[f"#{key.lower()}"] = key
return "set " + ", ".join(update_expression), attribute_values, attribute_names
Example use:
update_expression, attribute_values, attribute_names = get_update_params(
{"Status": "declined", "DeclinedBy": "username"}
)
response = table.update_item(
Key={"uuid": "12345"},
UpdateExpression=update_expression,
ExpressionAttributeValues=attribute_values,
ExpressionAttributeNames=attribute_names,
ReturnValues="UPDATED_NEW"
)
print(response)
An example to update any number of attributes given as a dict, and keep track of the number of updates. Works with reserved words (i.e name).
The following attribute names shouldn't be used as we will overwrite the value: _inc, _start.
from typing import Dict
from boto3 import Session
def getDynamoDBSession(region: str = "eu-west-1"):
"""Connect to DynamoDB resource from boto3."""
return Session().resource("dynamodb", region_name=region)
DYNAMODB = getDynamoDBSession()
def updateItemAndCounter(db_table: str, item_key: Dict, attributes: Dict) -> Dict:
"""
Update item or create new. If the item already exists, return the previous value and
increase the counter: update_counter.
"""
table = DYNAMODB.Table(db_table)
# Init update-expression
update_expression = "SET"
# Build expression-attribute-names, expression-attribute-values, and the update-expression
expression_attribute_names = {}
expression_attribute_values = {}
for key, value in attributes.items():
update_expression += f' #{key} = :{key},' # Notice the "#" to solve issue with reserved keywords
expression_attribute_names[f'#{key}'] = key
expression_attribute_values[f':{key}'] = value
# Add counter start and increment attributes
expression_attribute_values[':_start'] = 0
expression_attribute_values[':_inc'] = 1
# Finish update-expression with our counter
update_expression += " update_counter = if_not_exists(update_counter, :_start) + :_inc"
return table.update_item(
Key=item_key,
UpdateExpression=update_expression,
ExpressionAttributeNames=expression_attribute_names,
ExpressionAttributeValues=expression_attribute_values,
ReturnValues="ALL_OLD"
)
Hope it might be useful to someone!
In a simple way you can use below code to update item value with new one:
response = table.update_item(
Key={"my_id_name": "my_id_value"}, # to get record
UpdateExpression="set item_key_name=:item_key_value", # Operation action (set)
ExpressionAttributeValues={":value": "new_value"}, # item that you need to update
ReturnValues="UPDATED_NEW" # optional for declarative message
)
Simple example with multiple fields:
import boto3
dynamodb_client = boto3.client('dynamodb')
dynamodb_client.update_item(
TableName=table_name,
Key={
'PK1': {'S': 'PRIMARY_KEY_VALUE'},
'SK1': {'S': 'SECONDARY_KEY_VALUE'}
}
UpdateExpression='SET #field1 = :field1, #field2 = :field2',
ExpressionAttributeNames={
'#field1': 'FIELD_1_NAME',
'#field2': 'FIELD_2_NAME',
},
ExpressionAttributeValues={
':field1': {'S': 'FIELD_1_VALUE'},
':field2': {'S': 'FIELD_2_VALUE'},
}
)
using previous answer from eltbus , it worked for me , except for minor bug,
You have to delete the extra comma using update_expression[:-1]

Categories

Resources