Trying to Retrieve Report from Amazon Ads API - python

I am trying to retrieve a report that I generated, and shows a 'status' of 'successful'.
When I run the script below, I receive a 200 response. However, per the docs:
"A successful call returns a 307 redirect response. The redirect link points you to the S3 bucket where you can download your report file. The report file downloads as JSON compressed in gzip format."
headers = {
'Amazon-Advertising-API-ClientId': clientid,
'Authorization': access,
'Amazon-Advertising-API-Scope':scope,
'Content-Type':'application/json'
}
response = requests.get(
'https://advertising-api.amazon.com/v1/reports/amzn1.clicksAPI.v1.p1.624466CD.4b8dcbb2-bbc6-4936-a760-c632216a4a5e/download',
headers = headers
)
print(response.json)
response:
<bound method Response.json of <Response [200]

import gzip # unzip compressed file
import io # read binary
import requests
import pandas as pd
headers = {
"Authorization": f"Bearer {access_code}",
"Amazon-Advertising-API-Scope": profile_id,
"Amazon-Advertising-API-ClientId": client_id
}
response = requests.get(report_url, headers=headers)
if response.ok:
json_resp = response.json()
status = json_resp['status']
if status == 'IN_PROGRESS':
# check again
elif status == 'SUCCESS':
# initiate download
location = json_resp['location']
dl_response = requests.get(location, headers=headers, allow_redirects=True)
if dl_response.ok:
compressed_file = io.BytesIO(response.content) # extract .gz file
decompressed_file = gzip.GzipFile(fileobj=compressed_file) # unzip .gz file
output = pd.read_json(decompressed_file)
elif status == 'FAILURE':
# failure response

Related

Upload file to an external API using Python Lambda function

I'm trying to send a multipart/form-data POST request from an AWS lambda function to an external API containing a pdf file.
The client to lambda file upload works, and I can print the buffer content to the console.
The part that doesn't work is the file forwarding to an external API.
This is what i have been trying:
import json
import urllib3
import requests
from io import BytesIO
import base64
def lambda_handler(event, context):
http = urllib3.PoolManager()
encoded_body = base64.b64encode(event['body'].encode('utf-8'))
# multipart_string = "--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"author\"\r\n\r\nJohn Smith\r\n--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"file\"; filename=\"file.txt\"\r\nContent-Type: text/plain\r\nExpires: 0\r\n\r\nHello World\r\n--ce560532019a77d83195f9e9873e16a1--\r\n"
# content_type = "multipart/form-data; boundary=ce560532019a77d83195f9e9873e16a1"
# dec = decoder.MultipartDecoder(multipart_string, content_type)
url = "https://app.hubspot.com/api/filemanager/api/v3/files/upload?portalId=XXXXXX"
payload={'options': '{"access":"PUBLIC_NOT_INDEXABLE"}',
'folderPath': '"postman_upload"'}
# files=[
# ('file',('file.pdf',open('/Users/Downloads/file.pdf','rb'),'application/pdf'))
# ]
headers = {
'cookie': '_conv_r=s%3Awww.yahoo.com*m%3Aorganic*t%3A*c%3A; hubspotutk=549ddd47f01a08d6f999bf4e85615f42; __hssrc=1;',
'x-hubspot-csrf-hubspot': 'AAccUftoNFPKG7PGr6tcWR7JA6Rt35lR4D7qCRL3HcK3deuw23-ucJy4oQvE0dYI43bfP2CCKiZM0j7a03XmaC6hy46ExFlA5g',
'authorization': 'Bearer pbl-na1-c23b6031-7da9-4e7d-9516-6c58vc7cd0dc'
}
response = requests.request("POST", url, headers=headers, data=payload, files=files) #TYPE-1
print(response.text)
response = requests.request("POST", url,headers=headers, data=event['body'].encode('utf-8')) #TYPE-2
print(response.text)
return {
'statusCode': 200,
'body': encoded_body
}
But with this when I send the post request I get "415 Media Type not supported" response.
As the pdf/file is being sent as json (bytes) in the request body but it should be just form data with the parameters.
I am not able to achieve that, have already tried many approaches but none worked.
And most of the folks here are uploading the file to S3, which is typically easy. [But I want a post req with multipart file to an external API]
If anyone here has any idea for this or what I am doing wrong. Could you please help. Thanks.

uplad file to google drive with progress bar with python requests

This is my code for uploading to google drive with python requests using google-drive-api.
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt
from requests.exceptions import JSONDecodeError
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filedirectory:str):
metadata = {
"title": filename,
}
files = {}
session = requests.session()
with open(filedirectory, "rb") as fp:
files["file"] = fp
files["data"] = ('metadata', json.dumps(metadata), 'application/json')
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("access_token", "test.txt", "test.txt")
When i am trying send file with data attribute in post request then file name did not send and with files attribute in post request then requests-toolbelt not working. How to fix this error ?
When I saw your script, I thought that the content type is not included in the request header. In this case, I think that the request body is directly shown in the uploaded file. I thought that this might be the reason for your current issue. In order to remove this issue, how about the following modification?
From:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
To:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token,
"Content-Type": monitor.content_type,
},
)
In this case, from metadata = { "title": filename }, it supposes that url is https://www.googleapis.com/upload/drive/v2/files?uploadType=multipart. Please be careful about this.
When you want to use Drive API v3, please modify metadata = { "title": filename } to metadata = { "name": filename }, and use the endpoint of https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart.
When the file is uploaded with Drive API v3, the value of {'kind': 'drive#file', 'id': '###', 'name': 'test.txt', 'mimeType': 'text/plain'} is returned.
By the way, when an error like badContent occurs in your testing, please try to test the following modification. When in the request body of multipart/form-data the file content is put before the file metadata, it seems that an error occurs. I'm not sure whether this is the current specification. But, I didn't know the order of request body is required to be checked.
From
files = {}
files["file"] = fp
files["data"] = ('metadata', json.dumps(metadata), 'application/json')
To
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
Note:
I thought that in your script, an error might occur at file_size = os.path.getsize(filename). Please confirm this again.
When I tested your script by modifying the above modifications, I could confirm that a test file could be uploaded to Google Drive with the expected filename. In this case, I also modified it as follows.
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
References:
Files: insert of Drive API v2
Files: create of Drive API v3
Upload file data
Metadata needs to be sent in the post body as json.
Python Requests post() Method
data = Optional. A dictionary, list of tuples, bytes or a file object to send to the specified url
json = Optional. A JSON object to send to the specified url
metadata = {
"name": filename,
}
r = session.post(
url,
json=json.dumps(metadata),
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
Future readers can find below a complete script that also contains details on how to get access to the bearer token for HTTP authentication.
Most of the credit goes to the OP and answers to the OPs question.
"""
Goal: For one time upload of a large file (as the GDrive UI hangs up)
Step 1 - Create OAuth 2.0 Client ID + Client Secret
- by following the "Authentication" part of https://pythonhosted.org/PyDrive/quickstart.html
Step 2 - Get Access Token
- from the OAuth playground -> https://developers.google.com/oauthplayground/
--> Select Drive API v3 -> www.googleapis.com/auth/drive --> Click on "Authorize APIs"
--> Click on "Exchange authorization code for tokens" --> "Copy paste the access token"
--> Use it in the script below
Step 3 - Run file as daemon process
- nohup python -u upload_gdrive.py > upload_gdrive.log 2>&1 &
- tail -f upload_gdrive.log
"""
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt # pip install requests_toolbelt
from requests.exceptions import JSONDecodeError
import collections
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filepath:str):
metadata = {
"name": filename,
}
files = {}
session = requests.session()
with open(filepath, "rb") as fp:
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token
, "Content-Type": monitor.content_type
},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("<access_token>"
, "<upload_filename>", "<path_to_file>")

How can I post a PDF to AWS lambda

I have AWS Lambda set up.
def lambda_handler(event, context):
return {
'statusCode': 200,
'body': json.dumps(event)
}
I would like to POST in a PDF file so that I can operate on it in my lambda function.
Here is my POST code
import requests
headers = {
'X-API-KEY':'1234',
'Content-type': 'multipart/form-data'}
files = {
'document': open('my.pdf', 'rb')
}
r = requests.post(url, files=files, headers=headers)
display(r)
display(r.text)
I am getting the error:
<Response [400]>
'{"message": "Could not parse request body into json: Unexpected character (\\\'-\\\' (code 45)) in numeric value: expected digit (0-9) to follow minus sign, for valid numeric value
How can I POST over my PDF and be able to properly send over my PDF and access it in Lambda?
Note:
I am successful if I do this:
payload = '{"key1": "val1","key2": 22,"key3": 15,"key4": "val4"}'
r = requests.post(url = URL, data=payload, headers=HEADERS)
It is just the PDF part which I can't get
I figured it out. Took me a ton of time but I think I got it. Essentially it's all about encoding and decoding as bytes. Didn't have to touch the API Gateway at all.
Request:
HEADERS = {'X-API-KEY': '12345'}
data = '{"body" : "%s"}' % base64.b64encode(open(path, 'rb').read())
r = requests.post(url, data=data, headers=HEADERS)
In lambda
from io import BytesIO
def lambda_handler(event, context):
pdf64 = event["body"]
# Need this line as it does 'b'b'pdfdatacontent'.
pdf64 = pdf64[2:].encode('utf-8')
buffer = BytesIO()
content = base64.b64decode(pdf64)
buffer.write(content)
I found this worked quite well for me:
Request
import requests
file_loc = 'path/to/test.pdf'
data = open(file_loc,'rb').read() #this is a bytes object
r = requests.post(url, data=data)
r.ok #returns True (also a good idea to check r.text
#one-liner
requests.post(url, data=open(file_loc,'rb').read())
Lambda - Python3.8
import io, base64
body = event["body"]
attachment = base64.b64decode(body.encode()) #this is a bytes object
buff = io.BytesIO(attachment) #this is now useable - read/write etc.
#one-liner
buff = io.BytesIO(base64.b64decode(event["body"].encode()))
Not quite sure why, but for me base64 encoding (even with urlsafe) in the original request corrupted the file and it was no longer recognised as a PDF in Lambda, so the OP's answer didn't work for me.

"Bad Gateway" error using Python requests module with multipart encoded file POST

I am getting the error message "Bad Gateway
The proxy server received an invalid
response from an upstream server" with the following code:
import requests
url = "https://apis.company.com/v3/media"
attachments = 'media': ('x.mp3', open('x.mp3', 'r'))}
headers = {'content-type': "multipart/form-data",'cache-control': "no-cache"
'Authorization':"Bearer zzz" }
response = requests.post(url, files=attachments, headers = headers)
print response.text
I'm following the example in the requests Quickstart documentation, where it says "You can also pass a list of tuples to the data argument": http://docs.python-requests.org/en/master/user/quickstart/#post-a-multipart-encoded-file
What is causing this error and how can I fix it?
The main problem was that I set the content-type in the header. This code works:
import requests
url = 'https://apis.company.com/v3/media'
token = 'token-goes-here'
headers = { 'Authorization' : 'Bearer ' + token }
filename = 'x.mp3'
with open(filename, 'rb') as media_file:
attachments = {
'media': (filename, media_file, 'application/octet-stream')
}
response = requests.post(url, files = attachments, headers = headers)
print response.text

Python Requests - POST file with additional paramas

So I have this code that send API request to upload a file to a server, after searching the web I came up with something like that where 'payload' is additional params I need to send and 'files' is a path to the file I want to upload :
def sendAPIRequest2(self, env, object, method, params, files):
apiPath = "/api/secure/jsonws"
headers = {'content-type': 'multipart/form-data'}
url = env + apiPath + object
payload = {
"method": method,
"params": params,
"jsonrpc": "2.0"
}
data = json.dumps(payload)
myFile = {'file': open(files,'rb')}
try:
r = requests.post(url, files=myFile, data=data, headers=headers,
auth=HTTPBasicAuth(self.user, self.password), verify=False)
print r.text
resjson = json.loads(r.text)
except:
print "no Valid JSON has returned from"+object+" API"
sys.exit(1)
return resjson
I get:
ValueError: Data must not be a string.
What am I doing wrong?

Categories

Resources