Fail to get csv from S3 and convert it with Python

Fail to get csv from S3 and convert it with Python - python

I need to read csv file from s3 bucket and insert each row on dynamoDB
def load_users_dynamodb():
s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table("test")
obj = s3.get_object(Bucket='test-app-config', Key='extract_Users.csv')
#return obj
data = obj['Body'].read().split('\n')
#return json.dumps(data)
with table.batch_writer() as batch:
for row in data:
batch.put_item(Item={
'registration': row.split(',')[0],
'name': row.split(',')[1],
'role': row.split(',')[2],
'company': row.split(',')[3],
'hiredcompany': row.split(',')[4],
'region': row.split(',')[5]
})
return 'OK'
im getting exception and I can't proceed:
Response:
{
"errorMessage": "a bytes-like object is required, not 'str'",
"errorType": "TypeError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 10, in lambda_handler\n 'body': load_users_dynamodb()\n",
" File \"/var/task/lambda_function.py\", line 21, in load_users_dynamodb\n data = obj['Body'].read().split('\\n')\n"
]
}
Someone can help me please? o/

Your issue related to decoding the object return from s3.You need to read the file as csv.
Take a look at the following code snippet:
import boto3
import csv
s3 = boto3.client('s3')
def lambda_handler(event, context):
obj = s3.get_object(Bucket='Bucket_Name', Key='File_Name.csv')
data = obj['Body'].read().decode('utf-8').splitlines()
lines = csv.reader(data)
headers = next(lines)
print('headers: %s' %(headers))
for line in lines:
print(line)
Output :
Dummy csv.

Related

Python::Not reading data correctly from the file in S3

Requirement: To read data from S3 to pass into API
Error: "error": {"code": "ModelStateInvalid", "message": "The request has exceeded the maximum number of validation errors.", "target": "HttpRequest"
When I pass data directly in the code as below document , it works fine as below
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = {'addresses': {'SingleLocation': {'city': 'ABC',
'country': 'US',
'line1': 'Main',
'postalCode': '00000',
'region': 'CA'
}
},
'commit': False,
}
response = client.cr_transc(document)
jsn = response.json()
But when tried having data in the file in the s3 and read it from s3 , it throws into error
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = data_from_s3()
response = client.cr_transc(document)
jsn = response.json()
def data_from_s3(self):
s3 = S3Hook()
data = s3.read_key(bucket_name = self.bucket_name, key = self.data_key)
return data
Below link is for read_key method in airflow
https://airflow.apache.org/docs/apache-airflow/1.10.6/_modules/airflow/hooks/S3_hook.html#S3Hook:~:text=%5Bdocs%5D%20%20%20%20def-,read_key,-(self%2C

Checking the source code:
def read_key(self, key, bucket_name=None):
"""
Reads a key from S3
:param key: S3 key that will point to the file
:type key: str
:param bucket_name: Name of the bucket in which the file is stored
:type bucket_name: str
"""
obj = self.get_key(key, bucket_name)
return obj.get()['Body'].read().decode('utf-8')
This returns a str. You might need to use the json module to convert it:
import json
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = json.loads(data_from_s3()) # <----- convert here
response = client.cr_transc(document)
jsn = response.json()
def data_from_s3(self):
s3 = S3Hook()
data = s3.read_key(bucket_name = self.bucket_name, key = self.data_key)
return data

AWS ParamValidationError: Parameter validation failed:

I have set up a trigger/lambda to upload into DynamoDB however i get the following error when uploading..not sure what is going wrong.
So far i have just created a blank dDB table with the primary key of "PlayerWeekID" as string but nothing else. Is this an issue because DDB isnt reading in the data types? Do I need to specify these in the Lamdda or set up in DDB before running the code?
Update:
This is the python code:
#change dataframe to json
sdl_fpl_data = dffinal.to_json(orient='records', lines=True)
s3 = boto3.resource('s3')
obj = s3.Object('bucket-name','sdl_fpl_data.json')
obj.put(Body=json.dumps(sdl_fpl_data))
Lambda:
import boto3
import json
s3_client = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
json_file_name = event['Records'][0]['s3']['object']['key']
json_object = s3_client.get_object(Bucket=bucket,Key=json_file_name)
jsonFileReader = json_object['Body'].read()
jsonDict = json.loads(jsonFileReader)
table = dynamodb.Table('my-table')
table.put_item(Item=jsonDict)
[ERROR] ParamValidationError: Parameter validation failed:
Invalid type for parameter Item, value:
{
"GW": "GW1",
"OR": "2,149,169",
"GWP": 66,
"PB": 3,
"TM": 0,
"TC": 0,
"£": 100,
"Manager": "XXXXX",
"Team Name": "XXXXXX",
"Player_Number": "372",
"TP": 66,
"PlayerWeekID": "372GW1"
}
, type: <class 'str'>, valid types: <class 'dict'>
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 16, in lambda_handler
table.put_item(Item=jsonDict)
Output of jsonDict

Can you share the output of your variable jsonDict.
DynamoDB needs a JSON object as payload: {}
From what I understand it looks like you're trying to save a list [].
Ensure you are saving an object which contains the keys of your table and you should have no issue.
Working example:
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('test')
item = {
"GW": "GW1",
"OR": "2,149,169",
"GWP": 66,
"PB": 3,
"TM": 0,
"TC": 0,
"£": 100,
"Manager": "XXXXX",
"Team Name": "XXXXXX",
"Player_Number": "372",
"TP": 66,
"PlayerWeekID": "372GW1"
}
try:
res = table.put_item(Item=item)
print(res)
except Exception as e:
print(e)

Why do I get "memoryview: a bytes-like object is required" when trying to use gzip.compress on JSON data?

I read a JSON file from S3 like this:
json_file = s3_resource.Object(bucket_name='test', key='new.json'
json_content = json.loads(file_content)
....
gzipped_content = gzip.compress(json_content)
After reading the file into json_content, I want to gzip it.
But I am not sure what to pass to gzip.compress() for its arguments.
Currently, I get the error below:
{
"errorMessage": "memoryview: a bytes-like object is required, not 'list'",
"errorType": "TypeError",
"requestId": "017949f4-533b-4087-9038-10fd39f435d9",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 28, in lambda_handler\n gzipped_content = gzip.compress(json_content)\n",
" File \"/var/lang/lib/python3.9/gzip.py\", line 548, in compress\n f.write(data)\n",
" File \"/var/lang/lib/python3.9/gzip.py\", line 284, in write\n data = memoryview(data)\n"
]
}
json_content
[{'actionCodes': [], 'additionalCostOccured': '', 'amountEURRecieved': 0.0, 'amountOfAdditionalCost':}]
For zipped files, I did something like this and it worked:
with zipped.open(file, "r") as f_in:
gzipped_content = gzip.compress(f_in.read())
What is the issue?

As the error suggests, gzip.compress(...) expects a bytes-like object, while you are providing a list.
You need to:
Pass the (modified?) list object (or any other JSON spec. compatible object) to json.dumps to obtain a JSON formatted str
Pass the JSON string to str.encode to then get a bytes object
Pass the bytes object to gzip.compress(...)
This should work:
json_file = s3_resource.Object(bucket_name='test', key='new.json'
json_content = json.loads(file_content)
....
content_back_to_json = json.dumps(json_content)
json_content_as_bytes = str.encode(content_back_to_json)
gzipped_content = gzip.compress(json_content_as_bytes)

Export DynamoDB to CSV on S3 with Lambda function (python)

Hello im trying to generate a CSV from dynamoDB to S3 using lambda function. the thing is I just get an empty file on s3. Please your help!
import csv
import boto3
import json
dynamodb = boto3.resource('dynamodb')
db = dynamodb.Table('ReporteTelefonica')
def lambda_handler(event, context):
AWS_BUCKET_NAME = 'reportetelefonica'
s3 = boto3.resource('s3')
bucket = s3.Bucket(AWS_BUCKET_NAME)
path = 'test.csv'
try:
response = db.scan()
myFile = open(path, 'w')
for i in response['Items']:
csv.register_dialect('myDialect', delimiter=',', quoting=csv.QUOTE_NONE)
with myFile:
writer = csv.writer(myFile, dialect='myDialect')
writer.writerows(i)
print(i)
except :
print("error")
bucket.put_object(
ACL='public-read-write',
ContentType='application/csv',
Key=path,
# Body=json.dumps(i),
)
# print("here")
body = {
"uploaded": "true",
"bucket": AWS_BUCKET_NAME,
"path": path,
}
# print("then here")
return {
"statusCode": 200,
"body": json.dumps(body)
}
I'm kind of noob on this, so I was wondering what should I modify to successfully make a complete scan of the table and write the values on the CSV on S3???

Here's a working lambda that will do the job.
import boto3
import json
import os
import pandas as pd
TABLE_NAME = os.environ.get("DDB_TABLE_NAME")
OUTPUT_BUCKET = os.environ.get("BUCKET_NAME")
TEMP_FILENAME = '/tmp/export.csv'
OUTPUT_KEY = 'export.csv'
s3_resource = boto3.resource('s3')
dynamodb_resource = boto3.resource('dynamodb')
table = dynamodb_resource.Table(TABLE_NAME)
def lambda_handler(event, context):
response = table.scan()
df = pd.DataFrame(response['Items'])
df.to_csv(TEMP_FILENAME, index=False, header=True)
# Upload temp file to S3
s3_resource.Bucket(OUTPUT_BUCKET).upload_file(TEMP_FILENAME, OUTPUT_KEY)
return {
'statusCode': 200,
'headers': {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Credentials": True,
"content-type": "application/json"
},
'body': json.dumps('OK')
}

You either have to close the file after you finished writing the cvs records and then reopen for reading and pass to the put_obkect method.
Alternatively you open the file for reading and writing and after writing you seek to position 0 so that the put_object method reads from the start.

Python Post Request - Getting 415 Error When Sending Files via Outlook API

I've been having some trouble sending files via python's rest module. I can send emails without attachments just fine but as soon as I try and add a files parameter, the call fails and I get a 415 error.
I've looked through the site and found out it was maybe because I wasn't sending the content type of the files when building that array of data so altered it to query the content type with mimetypes; still 415.
This thread: python requests file upload made a couple of more edits but still 415.
The error message says:
"A supported MIME type could not be found that matches the content type of the response. None of the supported type(s)"
Then lists a bunch of json types e.g: "'application/json;odata.metadata=minimal;odata.streaming=true;IEEE754Compatible=false"
then says:
"matches the content type 'multipart/form-data; boundary=0e5485079df745cf0d07777a88aeb8fd'"
Which of course makes me think I'm still not handling the content type correctly somewhere.
Can anyone see where I'm going wrong in my code?
Thanks!
Here's the function:
def send_email(access_token):
import requests
import json
import pandas as pd
import mimetypes
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'MY TEST EMAIL ADDRESS'
}
}
]
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
#need to convert the above json_data to dict, otherwise it won't work
json_data = json.loads(json_data)
###ATTACHMENT WORK
file_list = ['test_files/test.xlsx', 'test_files/test.docx']
files = {}
pos = 1
for file in file_list:
x = file.split('/') #seperate file name from file path
files['file'+str(pos)] = ( #give the file a unique name
x[1], #actual filename
open(file,'rb'), #open the file
mimetypes.MimeTypes().guess_type(file)[0] #add in the contents type
)
pos += 1 #increase the naming iteration
#print(files)
r = requests.post(url, headers=headers, json=json_data, files=files)
print("")
print(r)
print("")
print(r.text)

I've figured it out! Took a look at the outlook API documentation and realised I should be adding attachments as encoded lists within the message Json, not within the request.post function. Here's my working example:
import requests
import json
import pandas as pd
import mimetypes
import base64
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
Attachments = []
file_list = ['test_files/image.png', 'test_files/test.xlsx']
for file in file_list:
x = file.split('/') #file the file path so we can get it's na,e
filename = x[1] #get the filename
content = open(file,'rb') #load the content
#encode the file into bytes then turn those bytes into a string
encoded_string = ''
with open(file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
encoded_string = encoded_string.decode("utf-8")
#append the file to the attachments list
Attachments.append({
"#odata.type": "#Microsoft.OutlookServices.FileAttachment",
"Name": filename,
"ContentBytes": encoded_string
})
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'EMAIL_ADDRESS'
}
}
],
"Attachments": Attachments
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
json_data = json.loads(json_data)
r = requests.post(url, headers=headers, json=json_data)
print(r)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Fail to get csv from S3 and convert it with Python - python

Related

Python::Not reading data correctly from the file in S3

AWS ParamValidationError: Parameter validation failed:

Why do I get "memoryview: a bytes-like object is required" when trying to use gzip.compress on JSON data?

Export DynamoDB to CSV on S3 with Lambda function (python)

Python Post Request - Getting 415 Error When Sending Files via Outlook API

Categories

Resources