Upload csv via API gateway to S3

Upload csv via API gateway to S3 - python

I am trying to set up an AWS API Gateway that could receive a POST request an upload a csv file to S3. Ideally, I would like to make some transformations to the file before uploading it to S3 (renaming and formatting some columns to normalize their names accross different uploads).
So far, I have set up my API Gateway to receive the request and send it to an AWS Lambda. I use Lambda proxy integration. The triggered lambda is as follows:
import logging
import pandas as pd
import boto3
logger = logging.getLogger()
logger.setLevel(logging.INFO)
s3 = boto3.client("s3")
def handler(event, context):
logger.info(f"Event: {event}")
df = pd.read_csv(event['body']['file'])
logger.info(f"df1: {df}")
# Provided parameters
try:
code = event['body']['code']
except KeyError:
logger.info('Code not provided')
code = 'Code'
try:
date = event['body']['date']
except KeyError:
logger.info('Date not provided')
date = 'Date'
try:
debit = event['body']['debit']
except KeyError:
logger.info('Debit not provided')
debit = 'Debit'
try:
credit = event['body']['credit']
except KeyError:
logger.info('Credit not provided')
credit = 'Credit'
try:
id= event['body']['id']
except KeyError:
logger.info('Id not provided')
id = '001'
df.rename(columns={code: 'Code', date: 'Date', credit: 'Credit', debit: 'Debit'})
df.to_csv(f's3://bucket/{id}/file.csv', line_terminator='\n', sep = ';', date_format='%Y-%m-%d %H:%M:%S')
return {
'statusCode': 200,
'headers': {
'Content-Type': 'text/csv',
'Access-Control-Allow-Origin': '*'
},
'body': {
'uploaded': True
},
'isBase64Encoded': False
}
To test this API, I use the following function:
import requests
csv_file = open("file.csv", 'rb')
headers = {"x-api-key": "xxx", "Content-Type":"text/csv"}
url = "https://xxx.execute-api.xxx.amazonaws.com/xxx"
body = {
"file": csv_file,
"code": "my_code"
}
# files = {
# "file": ("file.csv", open('file.csv', 'r'), 'text/csv')
# }
r = requests.post(url=url, headers=headers, data=body)
print(r.text)
The output is {"message": "Internal server error"}, and if I look in CloudWatch logs, I see that the event is encoded this way:
'body': 'file=%EF%BB%BFCol1%3BCol2%3BCol3%3BCol4%0D%0A&file=11%3B12%3B13%3B14%3B%0D%0A&file=21%3B22%3B23%3B24%3B...'
It looks like the body is encoded and passed row by row into different "file" fields. For a file with about 5000 rows I get the error OSError: [Errno 36] File name too long when trying to read it.
Is there another way to proceed in order to get a full dataset that I can transform into a pandas dataframe?
I have also seen suggestions with multipart/form-data, using files=files in the request or using csv library but I keep getting similar errors.
Thank you

Related

how to upload reels to facebook with graph api using python

I am trying to upload reel with Graph Api using Python.
I'm getting an error every time I try to upload video.
Error :
{"debug_info":{"retriable":false,"type":"NotAuthorizedError","message":"User not authorized to perform this request"}}
Note: I have given every possible permission to my app and page.
code:
import requests
import os
import json
Title ="Title of the video"
title = Title
description = title
source = f"F:\proj_ytTofb\downloads\{Title}.mp4"
files = {'source': open(source, 'rb')}
file_size = os.path.getsize(source)
print("File Size is :", file_size, "bytes")
def Initialize():
url = f"https://graph.facebook.com/v13.0/{page_id}/video_reels?upload_phase=start"
payload = {
'access_token': token,
}
r = requests.post(url, data = payload)
return r.json()["video_id"]
video_id=Initialize()
print(video_id)
def Upload():
url = f"https://rupload.facebook.com/video-upload/v15.0/{video_id}"
payload ={
'access_token': token,
'offset': 0,
'file_size': file_size,
}
r = requests.post(url, files=files, data=payload)
return r.text
print(Upload())
output: {"debug_info":{"retriable":false,"type":"NotAuthorizedError","message":"User not authorized to perform this request"}}

Your Upload code seem bit wrong if you refer on documentation
def Upload(vidid, size, filedata):
url = f"https://rupload.facebook.com/video-upload/v13.0/{vidid}"
payloadUp = {
'Authorization': 'OAuth ' + page_access_token,
'offset': "0",
'file_size': str(size),
}
print(payloadUp)
r = requests.post(url, data=filedata, headers=payloadUp)
return r.text
with parameter like this
files = {'source': open(mp4_path, 'rb')}
file_size = os.path.getsize(mp4_path)
and then you called it like this
Upload(video_id, file_size, files)
Note: I successfully upload it on fb reels and published it, but I dont what happen the video failed to convert without error notice.

Export DynamoDB to CSV on S3 with Lambda function (python)

Hello im trying to generate a CSV from dynamoDB to S3 using lambda function. the thing is I just get an empty file on s3. Please your help!
import csv
import boto3
import json
dynamodb = boto3.resource('dynamodb')
db = dynamodb.Table('ReporteTelefonica')
def lambda_handler(event, context):
AWS_BUCKET_NAME = 'reportetelefonica'
s3 = boto3.resource('s3')
bucket = s3.Bucket(AWS_BUCKET_NAME)
path = 'test.csv'
try:
response = db.scan()
myFile = open(path, 'w')
for i in response['Items']:
csv.register_dialect('myDialect', delimiter=',', quoting=csv.QUOTE_NONE)
with myFile:
writer = csv.writer(myFile, dialect='myDialect')
writer.writerows(i)
print(i)
except :
print("error")
bucket.put_object(
ACL='public-read-write',
ContentType='application/csv',
Key=path,
# Body=json.dumps(i),
)
# print("here")
body = {
"uploaded": "true",
"bucket": AWS_BUCKET_NAME,
"path": path,
}
# print("then here")
return {
"statusCode": 200,
"body": json.dumps(body)
}
I'm kind of noob on this, so I was wondering what should I modify to successfully make a complete scan of the table and write the values on the CSV on S3???

Here's a working lambda that will do the job.
import boto3
import json
import os
import pandas as pd
TABLE_NAME = os.environ.get("DDB_TABLE_NAME")
OUTPUT_BUCKET = os.environ.get("BUCKET_NAME")
TEMP_FILENAME = '/tmp/export.csv'
OUTPUT_KEY = 'export.csv'
s3_resource = boto3.resource('s3')
dynamodb_resource = boto3.resource('dynamodb')
table = dynamodb_resource.Table(TABLE_NAME)
def lambda_handler(event, context):
response = table.scan()
df = pd.DataFrame(response['Items'])
df.to_csv(TEMP_FILENAME, index=False, header=True)
# Upload temp file to S3
s3_resource.Bucket(OUTPUT_BUCKET).upload_file(TEMP_FILENAME, OUTPUT_KEY)
return {
'statusCode': 200,
'headers': {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Credentials": True,
"content-type": "application/json"
},
'body': json.dumps('OK')
}

You either have to close the file after you finished writing the cvs records and then reopen for reading and pass to the put_obkect method.
Alternatively you open the file for reading and writing and after writing you seek to position 0 so that the put_object method reads from the start.

Sending bulk data to Azure ML Endpoint

I have an Azure ML endpoint which is used to get scoring when I supply data in json.
import requests
import json
# URL for the web service
scoring_uri = 'http://107a119d-9c23-4792-b5db-065e9d3af1e6.eastus.azurecontainer.io/score'
# If the service is authenticated, set the key or token
key = '##########################'
data = {"data":
[{'Associate_Gender': 'Male', 'Associate_Age': 20, 'Education': 'Under Graduate', 'Source_Hiring': 'Internal Movement', 'Count_of_Incoming_Calls_6_month': None, 'Count_of_Incoming_Calls_6_month_bucket': 'Greater than equal to 0 and less than 4', 'Internal_Quality_IQ_Score_Last_6_Months': '93%', 'Internal_Quality_IQ_Score_Last_6_Months_Bucket': 'Greater than 75%', 'Associate_Tenure_Floor_Bucket': 'Greater than 0 and less than 90', 'Current_Call_Repeat_Yes_No': False, 'Historical_CSAT_score': 'Greater than equal to 7 and less than 9', 'Customer_Age': 54, 'Customer_Age_Bucket': 'Greater than equal to 46', 'Network_Region_Originating_Call': 'East London', 'Length_of_Relationship_with_Customer': 266, 'Length_of_Relationship_with_Customer_bucket': 'Greater than 90', 'Call_Reason_Type_L1': 'Voice', 'Call_Reason_Type_L2': 'Prepaid', 'Call_Reason_Type_L3': 'Request for Reversal Provisioning', 'Number_of_VAS_services_active': 6, 'Customer_Category': 'Mercury', 'Customer_monthly_ARPU_GBP_Bucket': 'More than 30', 'Customer_Location': 'Houslow'}]
}
# Convert to JSON string
input_data = json.dumps(data)
# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'
# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)
How to send input data from files in bulk and get output. Or is it not feasible to send huge amount of data for scoring on endpoints?
Any alternative suggestion for scoring on azure is also welcome.

Lets assume you have a folder called json_data, where all your json files are stored, then you would open these files and post them to your endpoint as follows:
import requests
import json
import os
import glob
your_uri = 'https://jsonplaceholder.typicode.com/'
folder_path = './json_data'
for filename in glob.glob(os.path.join(folder_path, '*.json')):
with open(filename, 'r') as f:
json_input_data = json.load(f)
resp = requests.post(your_uri, json_input_data)
print(resp)
To showcase the successful http response 201 with jsonplaceholder.typicode.com you have to create a folder in the same directory of your python file and name it json_data, then create a few json files inside the folder and paste some data into the files, e.g.:
file1.json:
{
"title": "some title name 1",
"body": "some body content 1"
}
file2.json:
{
"title": "some title name 2",
"body": "some body content 2"
}
etc.
You could easily rewrite it and use your own uri, key, headers, etc.

To send bulk data for inferencing, I recommend to create a Batch Endpoint,
in Azure ML and the best way to do it is using the Azure CLI:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-batch-endpoint#create-a-batch-endpoint
You can then start a batch scoring using:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-batch-endpoint#start-a-batch-scoring-job-using-the-azure-cli
Or using REST:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-batch-endpoint#start-a-batch-scoring-job-using-rest

How to access information from config.json file to a python file?

The problem is I'm unable to access the information from config.json file to my python file
I have provided the JSON data and python code bellow
I have tried everything in the request module
but I can access the response without the config file but,
I need with config file
The following is a json file
{
"api_data": {
"request_url": "https://newapi.zivame.com/api/v1/catalog/list",
"post_data" : {"category_ids" : "948",
"limit" : "10000"},
"my_headers":{"Content-Type": "application/json"}
},
"redshift":{
"host":"XXX.XXXX.XXX",
"user":"XXXX",
"password":"XXXXXXXX",
"port": 8080,
"db":"XXXX"
},
"s3":{
"access_key":"XXXXXXXXX",
"secret_key":"XXXXXXXXXX",
"region":"XX-XXXXX-1",
"path":"XXXXXXXXXXXX/XXX",
"table":"XXXXXX",
"bucket":"XXXX",
"file": "XXXXXX",
"copy_column": "XXX",
"local_path": "XXXXX"
},
"csv_file": {
"promo_zivame": ""
}
}
and this is the program
#!/usr/bin/python
import json
import psycopg2
import requests
import os
BASE_PATH = os.path.dirname(os.path.realpath(__file__))
with open(BASE_PATH+'/config.json') as json_data_file:
data = json.load(json_data_file)
#api_config = data['api_data']
#redshift = data['redshift']
s3_config = data['s3']
#x = print(api_config.get('request_url'))
class ApiResponse:
#api response
def api_data(self, api_config):
print("starting api_data")
try:
self.ApiResponse = requests.post(api_config['request_url'], api_config['post_data'], api_config['my_headers'])
data_1 = self.ApiResponse
#data = json.dump(self.ApiResponse)
print("API Result Response")
print(())
print(self.ApiResponse)
return (self.ApiResponse)
except Exception:
print("response not found")
return False
def redshift_connect(self, redshift):
try:
# Amazon Redshift connect string
self.con = psycopg2.connect(
host=redshift['host'],
user=redshift['user'],
port=redshift['port'],
password=redshift['password'],
dbname=redshift['db'])
print(self.con)
return self.con
except Exception:
print("Error in Redshift connection")
return False
def main():
c1 = ApiResponse()
api_config = data['api_data']
redshift = data['redshift']
c1.api_data(api_config)
c1.api_data(data)
c1.redshift_connect(redshift)
if __name__=='__main__':
main()

Third argument to requests.post() is json. To provide headers, you need to use the name of the argument explicitly as #JustinEzequiel suggested. See the requests doc here: 2.python-requests.org/en/v1.1.0/user/quickstart/#custom-headers
requests.post(api_config['request_url'], json=api_config['post_data'], headers=api_config['my_headers'])

Borrowing code from https://stackoverflow.com/a/16696317/5386938
import requests
api_config = {
"request_url": "https://newapi.zivame.com/api/v1/catalog/list",
"post_data" : {"category_ids" : "948", "limit" : "10000"},
"my_headers":{"Content-Type": "application/json"}
}
local_filename = 'the_response.json'
with requests.post(api_config['request_url'], json=api_config['post_data'], headers=api_config['my_headers'], stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
saves the response into a file ('the_response.json') you can then pass around. Note the stream=True passed to requests.post

Python Post Request - Getting 415 Error When Sending Files via Outlook API

I've been having some trouble sending files via python's rest module. I can send emails without attachments just fine but as soon as I try and add a files parameter, the call fails and I get a 415 error.
I've looked through the site and found out it was maybe because I wasn't sending the content type of the files when building that array of data so altered it to query the content type with mimetypes; still 415.
This thread: python requests file upload made a couple of more edits but still 415.
The error message says:
"A supported MIME type could not be found that matches the content type of the response. None of the supported type(s)"
Then lists a bunch of json types e.g: "'application/json;odata.metadata=minimal;odata.streaming=true;IEEE754Compatible=false"
then says:
"matches the content type 'multipart/form-data; boundary=0e5485079df745cf0d07777a88aeb8fd'"
Which of course makes me think I'm still not handling the content type correctly somewhere.
Can anyone see where I'm going wrong in my code?
Thanks!
Here's the function:
def send_email(access_token):
import requests
import json
import pandas as pd
import mimetypes
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'MY TEST EMAIL ADDRESS'
}
}
]
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
#need to convert the above json_data to dict, otherwise it won't work
json_data = json.loads(json_data)
###ATTACHMENT WORK
file_list = ['test_files/test.xlsx', 'test_files/test.docx']
files = {}
pos = 1
for file in file_list:
x = file.split('/') #seperate file name from file path
files['file'+str(pos)] = ( #give the file a unique name
x[1], #actual filename
open(file,'rb'), #open the file
mimetypes.MimeTypes().guess_type(file)[0] #add in the contents type
)
pos += 1 #increase the naming iteration
#print(files)
r = requests.post(url, headers=headers, json=json_data, files=files)
print("")
print(r)
print("")
print(r.text)

I've figured it out! Took a look at the outlook API documentation and realised I should be adding attachments as encoded lists within the message Json, not within the request.post function. Here's my working example:
import requests
import json
import pandas as pd
import mimetypes
import base64
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
Attachments = []
file_list = ['test_files/image.png', 'test_files/test.xlsx']
for file in file_list:
x = file.split('/') #file the file path so we can get it's na,e
filename = x[1] #get the filename
content = open(file,'rb') #load the content
#encode the file into bytes then turn those bytes into a string
encoded_string = ''
with open(file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
encoded_string = encoded_string.decode("utf-8")
#append the file to the attachments list
Attachments.append({
"#odata.type": "#Microsoft.OutlookServices.FileAttachment",
"Name": filename,
"ContentBytes": encoded_string
})
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'EMAIL_ADDRESS'
}
}
],
"Attachments": Attachments
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
json_data = json.loads(json_data)
r = requests.post(url, headers=headers, json=json_data)
print(r)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Upload csv via API gateway to S3 - python

Related

how to upload reels to facebook with graph api using python

Export DynamoDB to CSV on S3 with Lambda function (python)

Sending bulk data to Azure ML Endpoint

How to access information from config.json file to a python file?

Python Post Request - Getting 415 Error When Sending Files via Outlook API

Categories

Resources