Boto3 and Flask: images are not uploading to S3 properly - python

I am building a gallery for my Flask website and am having trouble generating thumbnails in my CMS. I have a function that takes the uploaded images (stored in a list), I duplicate the list, and use zip() to iterate through both of the lists. I make thumbnails with one list and I upload the full images with the other list and I send both to AWS S3. The issue is that I do make the thumbnails but the full images get uploaded as images with sizes of 0 bytes. I also have a function that renames the images. That is working, so I assume that I am doing something right.
Why am I getting files of 0 bytes? How does that happen? What is 'emptying' the file?
#application.route("/upload", methods=['GET', 'POST'])
#login_required
def upload():
form = UploadForm()
uploaded_files = request.files.getlist("photo")
conn = boto3.client(
's3',
region_name="region",
aws_access_key_id='aws_access_key_id',
aws_secret_access_key='aws_secret_access_key'
)
bucket_name = "bucket"
if form.validate_on_submit():
i = 1
thumbs_list = list(uploaded_files)
for z, w in zip(thumbs_list, uploaded_files):
x = photo_rename(w)
thumb = save_thumbnail(z)
conn.upload_fileobj(x.stream, bucket_name, 'gallery/fulls/'+w.filename)
conn.upload_fileobj(thumb, bucket_name, 'gallery/thumbs/'+w.filename)
form_commit = Gallery(event=form.event.data,
date=form.date.data,
image_order=i,
image_file_fl=w.filename,
image_file_th=w.filename
)
db.session.add(form_commit)
i += 1
db.session.commit()
return '', 204

I was able to get both uploads to work using io.BytesIO() in my for loop:
for z, w in zip(thumbs_list, uploaded_files):
x = photo_rename(w)
c = Image.open(x)
in_mem_file = io.BytesIO()
c.save(in_mem_file, "JPEG")
in_mem_file.seek(0)
thumb = save_thumbnail(z)
conn.upload_fileobj(in_mem_file, bucket_name, 'gallery/fulls/'+w.filename)

Related

I have a problem uploading images to DO Spaces resized with PILL

I'm uploading images to Digital Ocean Spaces using boto3. It's working really good until I add PILL.
In django view I have this code when I get the image:
from digitalocean_spaces import DigitalOceanSpaces
from helpers import resize_maintain_its_aspect_ratio
def images_view(request):
if request.method == "POST":
images = request.FILES.getlist('images')
for index, image in enumerate(images):
size = image.size
content_type = image.content_type
file_name = image.name
# TODO: fix method
# image = resize_maintain_its_aspect_ratio(image, 500)
DigitalOceanSpaces().upload_file(
key=key,
file=image,
content_type=content_type,
acl='private'
)
I can see all the information of each image.
To upload the image I use this method that is working too:
class DigitalOceanSpaces:
def default_session_client(self):
session = boto3.session.Session()
client = session.client(
's3',
region_name=REGION_NAME,
endpoint_url=ENDPOINT_URL,
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY
)
return client
def upload_file(self, key, file, content_type, acl='private'):
client = self.default_session_client()
client.put_object(
Bucket=BUCKET_NAME,
Key=key,
Body=file,
ACL=acl,
ContentType=content_type,
Metadata={
'x-amz-meta-my-key': '*****'
}
)
The problem start when I call this another method to resize the image
from PIL import Image
def resize_maintain_its_aspect_ratio(image, base_width):
pillow_image = Image.open(image)
width_percent = (base_width / float(pillow_image.size[0]))
height_size = int((float(pillow_image.size[1]) * float(width_percent)))
resized_image = pillow_image.resize((base_width, height_size), Image.ANTIALIAS)
return resized_image
I see the error even if resize_maintain_its_aspect_ratio method just have:
pillow_image = Image.open(image)
So, the error is:
An error occurred (BadDigest) when calling the PutObject operation
(reached max retries: 4): Unknown
Does anyone know what the problem is ?

How to annotate MULTIPLE images from a single call using Google's vision API? Python

I recently started using Google's vision API. I am trying to annotate a batch of images and therefore issued the 'batch image annotation offline' guide from their documentation.
However, it is not clear to me how I can annotate MULTIPLE images from one API call. So let's say I have stored 10 images in my google cloud bucket. How can I annotate all these images at once and store them in one JSON file? Right now, I wrote a program that calls their example function and it works, but to put it simple, why can't I say: 'Look in this folder and annotate all images in it.'?
Thanks in advance.
from batch_image_labeling import sample_async_batch_annotate_images
counter = 0
for file in os.listdir('my_directory'):
filename = file
sample_async_batch_annotate_images('gs://my_bucket/{}'.format(filename), 'gs://my_bucket/{}'.format(counter))
counter += 1
from google.cloud import vision_v1
from google.cloud.vision_v1 import enums
import six
def sample_async_batch_annotate_images(input_image_uri, output_uri):
"""Perform async batch image annotation"""
client = vision_v1.ImageAnnotatorClient()
if isinstance(input_image_uri, six.binary_type):
input_image_uri = input_image_uri.decode('utf-8')
if isinstance(output_uri, six.binary_type):
output_uri = output_uri.decode('utf-8')
source = {'image_uri': input_image_uri}
image = {'source': source}
type_ = enums.Feature.Type.LABEL_DETECTION
features_element = {'type': type_}
type_2 = enums.Feature.Type.IMAGE_PROPERTIES
features_element_2 = {'type': type_2}
features = [features_element, features_element_2]
requests_element = {'image': image, 'features': features}
requests = [requests_element]
gcs_destination = {'uri': output_uri}
# The max number of responses to output in each JSON file
batch_size = 2
output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size}
operation = client.async_batch_annotate_images(requests, output_config)
print('Waiting for operation to complete...')
response = operation.result()
# The output is written to GCS with the provided output_uri as prefix
gcs_output_uri = response.output_config.gcs_destination.uri
print('Output written to GCS with prefix: {}'.format(gcs_output_uri))
It's somewhat unclear from that example, but your call to async_batch_annotate_images takes a requests parameter which is a list of multiple requests. So you can do something like this:
rom google.cloud import vision_v1
from google.cloud.vision_v1 import enums
import six
def generate_request(input_image_uri):
if isinstance(input_image_uri, six.binary_type):
input_image_uri = input_image_uri.decode('utf-8')
if isinstance(output_uri, six.binary_type):
output_uri = output_uri.decode('utf-8')
source = {'image_uri': input_image_uri}
image = {'source': source}
type_ = enums.Feature.Type.LABEL_DETECTION
features_element = {'type': type_}
type_2 = enums.Feature.Type.IMAGE_PROPERTIES
features_element_2 = {'type': type_2}
features = [features_element, features_element_2]
requests_element = {'image': image, 'features': features}
return requests_element
def sample_async_batch_annotate_images(input_uri, output_uri):
"""Perform async batch image annotation"""
client = vision_v1.ImageAnnotatorClient()
requests = [
generate_request(input_uri.format(filename))
for filename in os.listdir('my_directory')
]
gcs_destination = {'uri': output_uri}
# The max number of responses to output in each JSON file
batch_size = 1
output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size}
operation = client.async_batch_annotate_images(requests, output_config)
print('Waiting for operation to complete...')
response = operation.result()
# The output is written to GCS with the provided output_uri as prefix
gcs_output_uri = response.output_config.gcs_destination.uri
print('Output written to GCS with prefix: {}'.format(gcs_output_uri))
sample_async_batch_annotate_images('gs://my_bucket/{}', 'gs://my_bucket/results')
This can annotate up to 2,000 images in a single request. The only downside is that you can only specify a single output_uri as a destination, so you won't be able to use counter to put each result in a separate file, but you can set batch_size = 1 to ensure each response is written separately if this is what you want.

Is there a cleaner way to rotate smartphone images uploaded via flask before pushing to S3?

I'm building a webapp that takes uploaded images, stores them on Amazon S3 and then stores the URL in a SQLite database. Unfortunately, EXIF tags cause images that were taken via a smartphone to appear rotated (since they are landscape images w/ EXIF orientation tags).
Currently, my environment grabs the file from the POST data, saves it to my static files folder, rotates image (if needed) with PIL, pushes to S3 and finally deletes the local copy. Here is a little of the code involved:
from PIL import Image
import boto
from boto.s3.connection import S3Connection
from boto.s3.key import Key
def fix_orientation(filename):
img = Image.open(filename)
if hasattr(img, '_getexif'):
exifdata = img._getexif()
try:
orientation = exifdata.get(274)
except:
# There was no EXIF Orientation Data
orientation = 1
else:
orientation = 1
if orientation is 1: # Horizontal (normal)
pass
elif orientation is 2: # Mirrored horizontal
img = img.transpose(Image.FLIP_LEFT_RIGHT)
elif orientation is 3: # Rotated 180
img = img.rotate(180)
elif orientation is 4: # Mirrored vertical
img = img.rotate(180).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation is 5: # Mirrored horizontal then rotated 90 CCW
img = img.rotate(-90).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation is 6: # Rotated 90 CCW
img = img.rotate(-90)
elif orientation is 7: # Mirrored horizontal then rotated 90 CW
img = img.rotate(90).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation is 8: # Rotated 90 CW
img = img.rotate(90)
#save the result and overwrite the originally uploaded image
img.save(filename)
def push_to_s3(**kwargs):
try:
conn = S3Connection(app.config["S3_KEY"], app.config["S3_SECRET"])
buckets = [bucket.name for bucket in conn.get_all_buckets()]
bucket = conn.get_bucket(app.config["S3_BUCKET"])
k = Key(bucket)
k.key = app.config["S3_UPLOAD_DIR"] + kwargs.get("filename")
k.set_contents_from_filename(kwargs.get("photo"))
k.make_public()
return k
except Exception, e:
abort(500)
Here is handling the POST data
# Retrieving Form POST Data
fi = request.files.get("file")
#print "Storing and Rotating File (if needed)"
f = photos.save(fi)
path = photos.path(f)
fix_orientation(path)
#print "Uploading to S3"
img = push_to_s3(photo=path, filename=filename)
#print "Deleting Local Version"
os.remove(path)
The above solution works on Heroku's servers, but it just seems very duct tape'd together of a solution. Is there are cleaner way to do what I'm doing. That is, take a uploaded file, rotate it from memory and then push to S3?
I'm also using Flask-Uploads to handle storage of the upload images.
For what it is worth, Pillow supports a number of other inputs than a file name - including bytearray, buffer, and file-like object. The third is most probably what you are looking for, as anything loaded out of request.files is just a FileStorage file-like object. That simplifies the load-and-transform code to:
def fix_orientation(file_like_object):
img = Image.open(filename)
# ... snip ...
data = BytesIO()
img.save(data)
return data
Since we are going to be passing around data without using the filesystem very much, we can also switch to using boto.s3.key.Key's set_contents_from_file method instead of set_contents_from_filename:
def push_to_s3(photo, filename):
# ... snip ...
k.set_contents_from_file(photo, rewind=True)
# ... etc. ...
That simplifies the resulting implementation to:
# Retrieving Form POST Data
fi = request.files.get("file")
# print "Rotating File (if needed)"
fi = fix_orientation(fi)
# print "Uploading to S3"
push_to_s3(photo=fi, filename=filename)

Flask Wtforms FileField object has no attribute read

I'm trying to upload images to an Amazon S3 bucket from a Flask app. Here is my code:
def s3upload(image, acl='public-read'):
key = app.config['S3_KEY']
secret = app.config['S3_SECRET']
bucket = app.config['S3_BUCKET']
conn = S3Connection(key, secret)
mybucket = conn.get_bucket(bucket)
r = redis.StrictRedis(connection_pool = pool)
iid = r.incr('image')
now = time.time()
r.zadd('image:created_on', now, iid)
k = Key(mybucket)
k.key = iid
k.set_contents_from_string(image.read())
return iid
#app.route('/', methods = ['GET', 'POST'])
def index():
form = ImageForm(request.form)
print 'CHECKING REQUEST'
if request.method == 'POST' and form.image:
print 'VALID REQUEST'
image = form.image.read()
upload = s3upload(image)
print upload
else:
image = None
r = redis.StrictRedis(connection_pool = pool)
last_ten = r.zrange('image:created_on', 0, 9)
print last_ten
images = []
key = app.config['S3_KEY']
secret = app.config['S3_SECRET']
bucket = app.config['S3_BUCKET']
conn = S3Connection(key, secret)
mybucket = conn.get_bucket(bucket)
for image in last_ten:
images.append(mybucket.get_key(image, validate = False))
return render_template('index.html', form=form, images=images, image=image)
However I get an error at k.set_contents_from_string(image.read()) saying 'FileField' object has no attribute 'read'. Everything I've ready has indicated this is the proper way to upload an image to S3 and I've found several examples where they call .read() on a FileField object and it works fine. Thanks for your help.
FileField objects have a data attribute:
k.set_contents_from_string(image.data.read())
How about
import os
filestream = form.image.raw_data[0]
filestream.seek(0, os.SEEK_END)
read_data = filestream.tell()
or
read_data = form.image.raw_data[0].read()

Django: Image Resize and Upload with PIL, Amazon S3 and Boto

I'm trying to figure out the best way to take a user uploaded image, resize it, and store the original image as well as the resized image on Amazon S3.
I'm running Django 1.5, using PIL to resize the image, and using Boto to handle uploading the image file to S3. Right now I've got it to work by uploading the original image to S3, using PIL to open the image using the S3 path and resize it, and then saving the resized version to S3, however this doesn't seem to be the most efficient way to do this.
I'm wondering if there's a way to resize the image before uploading to S3 using the user-uploaded image itself (been having trouble getting PIL to open the image file itself), and whether this would be faster than the way I've set things up now. I can't seem to find an answer to this, either in the PIL documentation or anywhere else. I should mention that I don't want to just use a third party app to handle this, as part of my goal is to learn and understand fundamentally what is going on.
Is there a more efficient way to do this than what I've currently set up? A general explanation of what is happening at each step and why it makes the most sense to set things up that way would be ideal.
I should also mention that it seems to take much longer to upload the image to S3 than when I was just storing the image on my server. Is there a normal lag when uploading to S3 or is there potentially something in how things are set up that could be slowing down the S3 uploads?
I have an architecture consisting of a Django + Tastypie in Heroku and the image wharehouse in S3. What I do when a user uploads a photo from the frontend (written in JS), is resize the photo to a certain size (600 x 600 max size) always mantaining the aspect ratio. I'll paste the code to do this (it works).
views.py:
class UploadView(FormView):
form_class = OriginalForm
def form_valid(self, form):
original = form.save()
if original.image_width > 280 and original.image_height > 281:
if original.image_width > 600 or original.image_height > 600:
original.resize((600, 600))
if not original.image:
return self.success(self.request, form, None, errors = 'Error while uploading the image')
original.save()
up = UserProfile.objects.get(user = request.user.pk)
#Save the images to s3
s3 = S3Custom()
new_image = s3.upload_file(original.image.path, 'avatar')
#Save the s3 image path, as string, in the user profile
up.avatar = new_image
up.save
else:
return self.success(self.request, form, None, errors = 'The image is too small')
return self.success(self.request, form, original)
Here what I do is checking if the image is larger than 280 x 281 (the crop square, in the frontend, has that size), and also check if one of the sides of the image is larger than 600px. If that's the case, I call the (custom) method resize, of my Original class...
models.py:
class Original(models.Model):
def upload_image(self, filename):
return u'avatar/{name}.{ext}'.format(
name = uuid.uuid4().hex,
ext = os.path.splitext(filename)[1].strip('.')
)
def __unicode__(self):
return unicode(self.image)
owner = models.ForeignKey('people.UserProfile')
image = models.ImageField(upload_to = upload_image, width_field = 'image_width', height_field = 'image_height')
image_width = models.PositiveIntegerField(editable = False, default = 0)
image_height = models.PositiveIntegerField(editable = False, default = 0)
def resize(self, size):
if self.image is None or self.image_width is None or self.image_height is None:
print 'Cannot resize None things'
else:
IMG_TYPE = os.path.splitext(self.image.name)[1].strip('.')
if IMG_TYPE == 'jpeg':
PIL_TYPE = 'jpeg'
FILE_EXTENSION = 'jpeg'
elif IMG_TYPE == 'jpg':
PIL_TYPE = 'jpeg'
FILE_EXTENSION = 'jpeg'
elif IMG_TYPE == 'png':
PIL_TYPE = 'png'
FILE_EXTENSION = 'png'
elif IMG_TYPE == 'gif':
PIL_TYPE = 'gif'
FILE_EXTENSION = 'gif'
else:
print 'Not a valid format'
self.image = None
return
#Open the image from the ImageField and save the path
original_path = self.image.path
fp = open(self.image.path, 'rb')
im = Image.open(StringIO(fp.read()))
#Resize the image
im.thumbnail(size, Image.ANTIALIAS)
#Save the image
temp_handle = StringIO()
im.save(temp_handle, PIL_TYPE)
temp_handle.seek(0)
#Save image to a SimpleUploadedFile which can be saved into ImageField
suf = SimpleUploadedFile(os.path.split(self.image.name)[-1], temp_handle.read(), content_type=IMG_TYPE)
#Save SimpleUploadedFile into image field
self.image.save('%s.%s' % (os.path.splitext(suf.name)[0],FILE_EXTENSION), suf, save=False)
#Delete the original image
fp.close()
os.remove(original_path)
#Save other fields
self.image_width = im.size[0]
self.image_height = im.size[1]
return
The last thing you need is a "library" containing custom s3 methods:
class S3Custom(object):
conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
b = Bucket(conn, settings.AWS_STORAGE_BUCKET_NAME)
k = Key(b)
def upload_file(self, ruta, prefix):
try:
self.k.key = '%s/%s' % (prefix, os.path.split(ruta)[-1])
self.k.set_contents_from_filename(ruta)
self.k.make_public()
except Exception, e:
print e
return '%s%s' % (settings.S3_URL, self.k.key)
You should have AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_STORAGE_BUCKET_NAME, S3_URL in your settings file.

Categories

Resources