I have tried various attempts to estalish a connection to an S3 compatible service but I keep getting an error. The following code throws the error below. Any ideas?
import os
import boto
import boto.s3.connection
from boto.s3.key import Key
try:
conn = boto.s3.connect_to_region(region = 'nil',
aws_access_key_id = 'xx',
aws_secret_access_key = 'xx',
host = 'ds41s3-scs.xx.com',
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
response = s3.list_buckets()
except Exception,e:
print str(e)
print "error"
The error:
TypeError: connect_to_region() missing 1 required positional argument: 'region_name'
Why is the region set to nil? The signature of connect_to_region is:
boto.s3.connect_to_region(region_name, **kw_params)
You are missing the mandatory argument: region_name
Go for latest version of AWS SDK for python (boto3) which supports latest features
import boto3
client = boto3.client('s3')
response = client.list_buckets()
Related
I'm working in a Python web environment and I can simply upload a file from the filesystem to S3 using boto's key.set_contents_from_filename(path/to/file). However, I'd like to upload an image that is already on the web (say https://pbs.twimg.com/media/A9h_htACIAAaCf6.jpg:large).
Should I somehow download the image to the filesystem, and then upload it to S3 using boto as usual, then delete the image?
What would be ideal is if there is a way to get boto's key.set_contents_from_file or some other command that would accept a URL and nicely stream the image to S3 without having to explicitly download a file copy to my server.
def upload(url):
try:
conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
bucket_name = settings.AWS_STORAGE_BUCKET_NAME
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = "test"
k.set_contents_from_file(url)
k.make_public()
return "Success?"
except Exception, e:
return e
Using set_contents_from_file, as above, I get a "string object has no attribute 'tell'" error. Using set_contents_from_filename with the url, I get a No such file or directory error . The boto storage documentation leaves off at uploading local files and does not mention uploading files stored remotely.
Here is how I did it with requests, the key being to set stream=True when initially making the request, and uploading to s3 using the upload.fileobj() method:
import requests
import boto3
url = "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg"
r = requests.get(url, stream=True)
session = boto3.Session()
s3 = session.resource('s3')
bucket_name = 'your-bucket-name'
key = 'your-key-name' # key is the name of file on your bucket
bucket = s3.Bucket(bucket_name)
bucket.upload_fileobj(r.raw, key)
Ok, from #garnaat, it doesn't sound like S3 currently allows uploads by url. I managed to upload remote images to S3 by reading them into memory only. This works.
def upload(url):
try:
conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
bucket_name = settings.AWS_STORAGE_BUCKET_NAME
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = url.split('/')[::-1][0] # In my situation, ids at the end are unique
file_object = urllib2.urlopen(url) # 'Like' a file object
fp = StringIO.StringIO(file_object.read()) # Wrap object
k.set_contents_from_file(fp)
return "Success"
except Exception, e:
return e
Also thanks to How can I create a GzipFile instance from the “file-like object” that urllib.urlopen() returns?
For a 2017-relevant answer to this question which uses the official 'boto3' package (instead of the old 'boto' package from the original answer):
Python 3.5
If you're on a clean Python install, pip install both packages first:
pip install boto3
pip install requests
import boto3
import requests
# Uses the creds in ~/.aws/credentials
s3 = boto3.resource('s3')
bucket_name_to_upload_image_to = 'photos'
s3_image_filename = 'test_s3_image.png'
internet_image_url = 'https://docs.python.org/3.7/_static/py.png'
# Do this as a quick and easy check to make sure your S3 access is OK
for bucket in s3.buckets.all():
if bucket.name == bucket_name_to_upload_image_to:
print('Good to go. Found the bucket to upload the image into.')
good_to_go = True
if not good_to_go:
print('Not seeing your s3 bucket, might want to double check permissions in IAM')
# Given an Internet-accessible URL, download the image and upload it to S3,
# without needing to persist the image to disk locally
req_for_image = requests.get(internet_image_url, stream=True)
file_object_from_req = req_for_image.raw
req_data = file_object_from_req.read()
# Do the actual upload to s3
s3.Bucket(bucket_name_to_upload_image_to).put_object(Key=s3_image_filename, Body=req_data)
Unfortunately, there really isn't any way to do this. At least not at the moment. We could add a method to boto, say set_contents_from_url, but that method would still have to download the file to the local machine and then upload it. It might still be a convenient method but it wouldn't save you anything.
In order to do what you really want to do, we would need to have some capability on the S3 service itself that would allow us to pass it the URL and have it store the URL to a bucket for us. That sounds like a pretty useful feature. You might want to post that to the S3 forums.
A simple 3-lines implementation that works on a lambda out-of-the-box:
import boto3
import requests
s3_object = boto3.resource('s3').Object(bucket_name, object_key)
with requests.get(url, stream=True) as r:
s3_object.put(Body=r.content)
The source for the .get part comes straight from the requests documentation
from io import BytesIO
def send_image_to_s3(url, name):
print("sending image")
bucket_name = 'XXX'
AWS_SECRET_ACCESS_KEY = "XXX"
AWS_ACCESS_KEY_ID = "XXX"
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
response = requests.get(url)
img = BytesIO(response.content)
file_name = f'path/{name}'
print('sending {}'.format(file_name))
r = s3.upload_fileobj(img, bucket_name, file_name)
s3_path = 'path/' + name
return s3_path
I have tried as following with boto3 and it works me:
import boto3;
import contextlib;
import requests;
from io import BytesIO;
s3 = boto3.resource('s3');
s3Client = boto3.client('s3')
for bucket in s3.buckets.all():
print(bucket.name)
url = "#resource url";
with contextlib.closing(requests.get(url, stream=True, verify=False)) as response:
# Set up file stream from response content.
fp = BytesIO(response.content)
# Upload data to S3
s3Client.upload_fileobj(fp, 'aws-books', 'reviews_Electronics_5.json.gz')
Using the boto3 upload_fileobj method, you can stream a file to an S3 bucket, without saving to disk. Here is my function:
import boto3
import StringIO
import contextlib
import requests
def upload(url):
# Get the service client
s3 = boto3.client('s3')
# Rember to se stream = True.
with contextlib.closing(requests.get(url, stream=True, verify=False)) as response:
# Set up file stream from response content.
fp = StringIO.StringIO(response.content)
# Upload data to S3
s3.upload_fileobj(fp, 'my-bucket', 'my-dir/' + url.split('/')[-1])
S3 doesn't support remote upload as of now it seems. You may use the below class for uploading an image to S3. The upload method here first tries to download the image and keeps it in memory for sometime until it gets uploaded. To be able to connect to S3 you will have to install AWS CLI using command pip install awscli, then enter few credentials using command aws configure:
import urllib3
import uuid
from pathlib import Path
from io import BytesIO
from errors import custom_exceptions as cex
BUCKET_NAME = "xxx.yyy.zzz"
POSTERS_BASE_PATH = "assets/wallcontent"
CLOUDFRONT_BASE_URL = "https://xxx.cloudfront.net/"
class S3(object):
def __init__(self):
self.client = boto3.client('s3')
self.bucket_name = BUCKET_NAME
self.posters_base_path = POSTERS_BASE_PATH
def __download_image(self, url):
manager = urllib3.PoolManager()
try:
res = manager.request('GET', url)
except Exception:
print("Could not download the image from URL: ", url)
raise cex.ImageDownloadFailed
return BytesIO(res.data) # any file-like object that implements read()
def upload_image(self, url):
try:
image_file = self.__download_image(url)
except cex.ImageDownloadFailed:
raise cex.ImageUploadFailed
extension = Path(url).suffix
id = uuid.uuid1().hex + extension
final_path = self.posters_base_path + "/" + id
try:
self.client.upload_fileobj(image_file,
self.bucket_name,
final_path
)
except Exception:
print("Image Upload Error for URL: ", url)
raise cex.ImageUploadFailed
return CLOUDFRONT_BASE_URL + id
import boto
from boto.s3.key import Key
from boto.s3.connection import OrdinaryCallingFormat
from urllib import urlopen
def upload_images_s3(img_url):
try:
connection = boto.connect_s3('access_key', 'secret_key', calling_format=OrdinaryCallingFormat())
bucket = connection.get_bucket('boto-demo-1519388451')
file_obj = Key(bucket)
file_obj.key = img_url.split('/')[::-1][0]
fp = urlopen(img_url)
result = file_obj.set_contents_from_string(fp.read())
except Exception, e:
return e
I am trying to use boto3 for Amazon Mechanical Turk. I was trying to get the client using the following code:
import boto3
endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'
aws_access_key_id = <aws_access_key_id>
aws_secret_access_key = <aws_secret_access_key>
region_name = 'us-east-1'
client = boto3.client('mturk',
aws_access_key_id = aws_access_key_id,
aws_secret_access_key = aws_secret_access_key,
region_name=region_name,
endpoint_url = endpoint_url
)
But I am getting the following error about UnknownService name:
botocore.exceptions.UnknownServiceError: Unknown service: 'mturk'. Valid service
names are: acm,..., xray
Why is 'mturk' not in this list? The code I am using is taken from mturk developer website.
Any suggestion is welcome! Thanks in advance!
Buy use Google Analytics API (V4) I would like to upload file "Product Data"
This is the sample code from GA Documentation
from apiclient.http import MediaFileUpload
try:
media = MediaFileUpload('custom_data.csv',
mimetype='application/octet-stream',
resumable=False)
daily_upload = analytics.management().uploads().uploadData(
accountId='123456',
webPropertyId='UA-123456-1',
customDataSourceId='9876654321',
media_body=media).execute()
except TypeError, error:
# Handle errors in constructing a query.
print 'There was an error in constructing your query : %s' % error
except HttpError, error:
# Handle API errors.
print ('There was an API error : %s : %s' %
(error.resp.status, error.resp.reason))
This is What I ahve done, but I still have an issue regarding this part - analytics.management().uploads()
import argparse
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
from googleapiclient.http import MediaFileUpload
# SET VARS
CUSTOM_DATA_SOURCE_ID='_xxxxxxx'
WEB_PROPERTY_ID='UA-xxxxx-1'
ACCOUNT_ID='xxxxxx'
CSV_IMPORT_FILE_LOCATION='test_file.csv'
CREDENTIALS_KEY_FILE_LOCATION='key.json'
def get_service(api_name, api_version, scope, key_file_location):
credentials = ServiceAccountCredentials.from_json_keyfile_name(
key_file_location, scopes=scope)
http = credentials.authorize(httplib2.Http())
# Build the service object.
service = build(api_name, api_version, http=http)
return service
def uploadCSV(service):
try:
media = MediaFileUpload(CSV_IMPORT_FILE_LOCATION,
mimetype='application/octet-stream',
resumable=False)
daily_upload = service.management().uploads().uploadData(
accountId=ACCOUNT_ID,
webPropertyId=WEB_PROPERTY_ID,
customDataSourceId=CUSTOM_DATA_SOURCE_ID,
media_body=media).execute()
except TypeError, error:
# Handle errors in constructing a query.
print 'There was an error in constructing your query : %s' % error
def main():
# Define the auth scopes to request.
scope = ['https://www.googleapis.com/auth/analytics.edit','https://www.googleapis.com/auth/analytics']
# Authenticate and construct service.
service = get_service('analytics', 'v4', scope, CREDENTIALS_KEY_FILE_LOCATION)
# Upload CSV Data
uploadCSV(service)
if __name__ == '__main__':
main()
This is an error which I have received all the time:
AttributeError: 'Resource' object has no attribute 'management'
Any suggestions??
I assume that this is because I do not have these methods (management().uploads()) but this is what example from documentation says.
The current analytics api v4 only includes the reporting side of the API. To access the management endpoint you have to use v3. Try to rewrite your code using the v3 version of the API.
Here's what v3 looks like
Here's what v4 looks like
To start change this line:
service = get_service('analytics', 'v3', scope, CREDENTIALS_KEY_FILE_LOCATION)
But it might require more rewriting than this.
I am working on the python example for Cloud Vision API from github repo.
I have already setup the project and activated the service account with its key. I have also called the gcloud auth and entered my credentials.
Here is my code (as derived from the python example of Vision API text detection):
import base64
import os
import re
import sys
from googleapiclient import discovery
from googleapiclient import errors
import nltk
from nltk.stem.snowball import EnglishStemmer
from oauth2client.client import GoogleCredentials
import redis
DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' # noqa
BATCH_SIZE = 10
class VisionApi:
"""Construct and use the Google Vision API service."""
def __init__(self, api_discovery_file='/home/saadq/Dev/Projects/TM-visual-search/credentials-key.json'):
self.credentials = GoogleCredentials.get_application_default()
print self.credentials.to_json()
self.service = discovery.build(
'vision', 'v1', credentials=self.credentials,
discoveryServiceUrl=DISCOVERY_URL)
print DISCOVERY_URL
def detect_text(self, input_filenames, num_retries=3, max_results=6):
"""Uses the Vision API to detect text in the given file.
"""
images = {}
for filename in input_filenames:
with open(filename, 'rb') as image_file:
images[filename] = image_file.read()
batch_request = []
for filename in images:
batch_request.append({
'image': {
'content': base64.b64encode(
images[filename]).decode('UTF-8')
},
'features': [{
'type': 'TEXT_DETECTION',
'maxResults': max_results,
}]
})
request = self.service.images().annotate(
body={'requests': batch_request})
try:
responses = request.execute(num_retries=num_retries)
if 'responses' not in responses:
return {}
text_response = {}
for filename, response in zip(images, responses['responses']):
if 'error' in response:
print("API Error for %s: %s" % (
filename,
response['error']['message']
if 'message' in response['error']
else ''))
continue
if 'textAnnotations' in response:
text_response[filename] = response['textAnnotations']
else:
text_response[filename] = []
return text_response
except errors.HttpError as e:
print("Http Error for %s: %s" % (filename, e))
except KeyError as e2:
print("Key error: %s" % e2)
vision = VisionApi()
print vision.detect_text(['test_article.png'])
This is the error message I am getting:
Http Error for test_article.png: <HttpError 403 when requesting https://vision.googleapis.com/v1/images:annotate?alt=json returned "Google Cloud Vision API has not been used in project google.com:cloudsdktool before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/vision.googleapis.com/overview?project=google.com:cloudsdktool then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry.">
I want to be able to use my own project for the example and not the default (google.com:cloudsdktool).
Download the credentials you created and update the GOOGLE_APPLICATION_CREDENTIALS environment variable to point to that file:
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json
Reference: https://github.com/GoogleCloudPlatform/cloud-vision/tree/master/python/text#set-up-to-authenticate-with-your-projects-credentials
The export didn't work for me, even setting it in the code:
import os
...
os.environ['GOOGLE APPLICATION_CREDENTIALS'] = 'path to servvice account json'
os.environ['GCLOUD_PROJECT'] = 'project id'
...
But this fix worked:
Google cloud speech api throwing 403 when trying to use it
I'm working in a Python web environment and I can simply upload a file from the filesystem to S3 using boto's key.set_contents_from_filename(path/to/file). However, I'd like to upload an image that is already on the web (say https://pbs.twimg.com/media/A9h_htACIAAaCf6.jpg:large).
Should I somehow download the image to the filesystem, and then upload it to S3 using boto as usual, then delete the image?
What would be ideal is if there is a way to get boto's key.set_contents_from_file or some other command that would accept a URL and nicely stream the image to S3 without having to explicitly download a file copy to my server.
def upload(url):
try:
conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
bucket_name = settings.AWS_STORAGE_BUCKET_NAME
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = "test"
k.set_contents_from_file(url)
k.make_public()
return "Success?"
except Exception, e:
return e
Using set_contents_from_file, as above, I get a "string object has no attribute 'tell'" error. Using set_contents_from_filename with the url, I get a No such file or directory error . The boto storage documentation leaves off at uploading local files and does not mention uploading files stored remotely.
Here is how I did it with requests, the key being to set stream=True when initially making the request, and uploading to s3 using the upload.fileobj() method:
import requests
import boto3
url = "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg"
r = requests.get(url, stream=True)
session = boto3.Session()
s3 = session.resource('s3')
bucket_name = 'your-bucket-name'
key = 'your-key-name' # key is the name of file on your bucket
bucket = s3.Bucket(bucket_name)
bucket.upload_fileobj(r.raw, key)
Ok, from #garnaat, it doesn't sound like S3 currently allows uploads by url. I managed to upload remote images to S3 by reading them into memory only. This works.
def upload(url):
try:
conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
bucket_name = settings.AWS_STORAGE_BUCKET_NAME
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = url.split('/')[::-1][0] # In my situation, ids at the end are unique
file_object = urllib2.urlopen(url) # 'Like' a file object
fp = StringIO.StringIO(file_object.read()) # Wrap object
k.set_contents_from_file(fp)
return "Success"
except Exception, e:
return e
Also thanks to How can I create a GzipFile instance from the “file-like object” that urllib.urlopen() returns?
For a 2017-relevant answer to this question which uses the official 'boto3' package (instead of the old 'boto' package from the original answer):
Python 3.5
If you're on a clean Python install, pip install both packages first:
pip install boto3
pip install requests
import boto3
import requests
# Uses the creds in ~/.aws/credentials
s3 = boto3.resource('s3')
bucket_name_to_upload_image_to = 'photos'
s3_image_filename = 'test_s3_image.png'
internet_image_url = 'https://docs.python.org/3.7/_static/py.png'
# Do this as a quick and easy check to make sure your S3 access is OK
for bucket in s3.buckets.all():
if bucket.name == bucket_name_to_upload_image_to:
print('Good to go. Found the bucket to upload the image into.')
good_to_go = True
if not good_to_go:
print('Not seeing your s3 bucket, might want to double check permissions in IAM')
# Given an Internet-accessible URL, download the image and upload it to S3,
# without needing to persist the image to disk locally
req_for_image = requests.get(internet_image_url, stream=True)
file_object_from_req = req_for_image.raw
req_data = file_object_from_req.read()
# Do the actual upload to s3
s3.Bucket(bucket_name_to_upload_image_to).put_object(Key=s3_image_filename, Body=req_data)
Unfortunately, there really isn't any way to do this. At least not at the moment. We could add a method to boto, say set_contents_from_url, but that method would still have to download the file to the local machine and then upload it. It might still be a convenient method but it wouldn't save you anything.
In order to do what you really want to do, we would need to have some capability on the S3 service itself that would allow us to pass it the URL and have it store the URL to a bucket for us. That sounds like a pretty useful feature. You might want to post that to the S3 forums.
A simple 3-lines implementation that works on a lambda out-of-the-box:
import boto3
import requests
s3_object = boto3.resource('s3').Object(bucket_name, object_key)
with requests.get(url, stream=True) as r:
s3_object.put(Body=r.content)
The source for the .get part comes straight from the requests documentation
from io import BytesIO
def send_image_to_s3(url, name):
print("sending image")
bucket_name = 'XXX'
AWS_SECRET_ACCESS_KEY = "XXX"
AWS_ACCESS_KEY_ID = "XXX"
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
response = requests.get(url)
img = BytesIO(response.content)
file_name = f'path/{name}'
print('sending {}'.format(file_name))
r = s3.upload_fileobj(img, bucket_name, file_name)
s3_path = 'path/' + name
return s3_path
I have tried as following with boto3 and it works me:
import boto3;
import contextlib;
import requests;
from io import BytesIO;
s3 = boto3.resource('s3');
s3Client = boto3.client('s3')
for bucket in s3.buckets.all():
print(bucket.name)
url = "#resource url";
with contextlib.closing(requests.get(url, stream=True, verify=False)) as response:
# Set up file stream from response content.
fp = BytesIO(response.content)
# Upload data to S3
s3Client.upload_fileobj(fp, 'aws-books', 'reviews_Electronics_5.json.gz')
Using the boto3 upload_fileobj method, you can stream a file to an S3 bucket, without saving to disk. Here is my function:
import boto3
import StringIO
import contextlib
import requests
def upload(url):
# Get the service client
s3 = boto3.client('s3')
# Rember to se stream = True.
with contextlib.closing(requests.get(url, stream=True, verify=False)) as response:
# Set up file stream from response content.
fp = StringIO.StringIO(response.content)
# Upload data to S3
s3.upload_fileobj(fp, 'my-bucket', 'my-dir/' + url.split('/')[-1])
S3 doesn't support remote upload as of now it seems. You may use the below class for uploading an image to S3. The upload method here first tries to download the image and keeps it in memory for sometime until it gets uploaded. To be able to connect to S3 you will have to install AWS CLI using command pip install awscli, then enter few credentials using command aws configure:
import urllib3
import uuid
from pathlib import Path
from io import BytesIO
from errors import custom_exceptions as cex
BUCKET_NAME = "xxx.yyy.zzz"
POSTERS_BASE_PATH = "assets/wallcontent"
CLOUDFRONT_BASE_URL = "https://xxx.cloudfront.net/"
class S3(object):
def __init__(self):
self.client = boto3.client('s3')
self.bucket_name = BUCKET_NAME
self.posters_base_path = POSTERS_BASE_PATH
def __download_image(self, url):
manager = urllib3.PoolManager()
try:
res = manager.request('GET', url)
except Exception:
print("Could not download the image from URL: ", url)
raise cex.ImageDownloadFailed
return BytesIO(res.data) # any file-like object that implements read()
def upload_image(self, url):
try:
image_file = self.__download_image(url)
except cex.ImageDownloadFailed:
raise cex.ImageUploadFailed
extension = Path(url).suffix
id = uuid.uuid1().hex + extension
final_path = self.posters_base_path + "/" + id
try:
self.client.upload_fileobj(image_file,
self.bucket_name,
final_path
)
except Exception:
print("Image Upload Error for URL: ", url)
raise cex.ImageUploadFailed
return CLOUDFRONT_BASE_URL + id
import boto
from boto.s3.key import Key
from boto.s3.connection import OrdinaryCallingFormat
from urllib import urlopen
def upload_images_s3(img_url):
try:
connection = boto.connect_s3('access_key', 'secret_key', calling_format=OrdinaryCallingFormat())
bucket = connection.get_bucket('boto-demo-1519388451')
file_obj = Key(bucket)
file_obj.key = img_url.split('/')[::-1][0]
fp = urlopen(img_url)
result = file_obj.set_contents_from_string(fp.read())
except Exception, e:
return e