Load Model from BytesIO using Joblib - python

I have converted a model to a BytesIO object using joblib in the following way:
from io import BytesIO
import joblib
bytes_container = BytesIO()
joblib.dump(model, bytes_container)
bytes_container.seek(0) # update to enable reading
bytes_model = bytes_container.read()
How do I convert the bytes_model back to a model now. joblib.load asks for a filename instead of a bytestring.

I think you can just do the following:
bytes_container = BytesIO()
joblib.dump(model, bytes_container)
bytes_container.seek(0)
model = joblib.load(bytes_container)

Related

Django convert image to webp

I have service in my Django project's app, that upload images, and I need to convert all images to webp to optimize further work with these files on the frontend side.
Draft of _convert_to_webp method:
# imports
from pathlib import Path
from django.core.files import temp as tempfile
from django.core.files.uploadedfile import InMemoryUploadedFile
from PIL import Image
# some service class
...
def _convert_to_webp(self, f_object: InMemoryUploadedFile):
new_file_name = str(Path(f_object._name).with_suffix('.webp'))
temp_file = tempfile.NamedTemporaryFile(suffix='.temp.webp')
# FIXME: on other OS may cause FileNotFoundError
with open(temp_file 'wb') as f:
for line in f_object.file.readlines():
... # will it works good?
new_file = ...
new_f_object = InMemoryUploadedFile(
new_file,
f_object.field_name,
new_file_name,
f_object.content_type,
f_object.size,
f_object.charset,
f_object.content_type_extra
)
return new_file_name, new_f_object
...
f_object is InMemoryUploadedFile instance from POST request body (Django automatically create it).
My idea is to create a temporary file, write data from f_object.file.readlines() to it, open this file with PIL.Image.open and save with format="webp". Is this idea a good one or there is another way to make file converting?
I found a pretty clean way to do this using the django-resized package.
After pip installing, I just needed to swap out the imageField for a ResizedImageField
img = ResizedImageField(force_format="WEBP", quality=75, upload_to="post_imgs/")
All image uploads are automatically converted to .webp!
The solution was pretty simple. PIL.Image can be opened using file instance, so I just opened it using f_object.file and then saved it in BytesIO instance with optimization and compression.
Correctly working code:
# imports
from pathlib import Path
from django.core.files.uploadedfile import InMemoryUploadedFile
from PIL import Image
# some service class
...
def _convert_to_webp(self, f_object: InMemoryUploadedFile):
suffix = Path(f_object._name).suffix
if suffix == ".webp":
return f_object._name, f_object
new_file_name = str(Path(f_object._name).with_suffix('.webp'))
image = Image.open(f_object.file)
thumb_io = io.BytesIO()
image.save(thumb_io, 'webp', optimize=True, quality=95)
new_f_object = InMemoryUploadedFile(
thumb_io,
f_object.field_name,
new_file_name,
f_object.content_type,
f_object.size,
f_object.charset,
f_object.content_type_extra
)
return new_file_name, new_f_object
95% was chosen as balanced parameter. There was very bad quality with quality=80 or quality=90.

how to write .npy file to s3 directly?

I would like to know if there is any way to write an array as a numpy file(.npy) to an AWS S3 bucket directly. I can use np.save to save a file locally as shown below. But I am looking for a solution to write it directly to S3, without saving locally first.
a = np.array([1, 2, 3, 4])
np.save('/my/localfolder/test1.npy', a)
If you want to bypass your local disk and upload directly the data to the cloud, you may want to use pickle instead of using a .npy file:
import boto3
import io
import pickle
s3_client = boto3.client('s3')
my_array = numpy.random.randn(10)
# upload without using disk
my_array_data = io.BytesIO()
pickle.dump(my_array, my_array_data)
my_array_data.seek(0)
s3_client.upload_fileobj(my_array_data, 'your-bucket', 'your-file.pkl')
# download without using disk
my_array_data2 = io.BytesIO()
s3_client.download_fileobj('your-bucket', 'your-file.pkl', my_array_data2)
my_array_data2.seek(0)
my_array2 = pickle.load(my_array_data2)
# check that everything is correct
numpy.allclose(my_array, my_array2)
Documentation:
boto3
pickle
BytesIO
I've recently had issues with s3fs dependency conflicts with boto3, so I try to avoid using it. This solution only depends on boto3, does not write to disk, and does not explicitly use pickle.
Saving:
from io import BytesIO
import numpy as np
from urllib.parse import urlparse
import boto3
client = boto3.client("s3")
def to_s3_npy(data: np.array, s3_uri: str):
# s3_uri looks like f"s3://{BUCKET_NAME}/{KEY}"
bytes_ = BytesIO()
np.save(bytes_, data, allow_pickle=True)
bytes_.seek(0)
parsed_s3 = urlparse(s3_uri)
client.upload_fileobj(
Fileobj=bytes_, Bucket=parsed_s3.netloc, Key=parsed_s3.path[1:]
)
return True
Loading:
def from_s3_npy(s3_uri: str):
bytes_ = BytesIO()
parsed_s3 = urlparse(s3_uri)
client.download_fileobj(
Fileobj=bytes_, Bucket=parsed_s3.netloc, Key=parsed_s3.path[1:]
)
bytes_.seek(0)
return np.load(bytes_, allow_pickle=True)
You can also use s3fs which is a file system interface to s3, a wrapper around boto. This solution also uses pickle, so make sure to allow_pickle=True at np.load. Refer functions below to both write and read.
import numpy as np
import pickle
from s3fs.core import S3FileSystem
s3 = S3FileSystem()
def saveLabelsToS3(npyArray, name):
with s3.open('{}/{}'.format(bucket, name), 'wb') as f:
f.write(pickle.dumps(npyArray))
def readLabelsFromS3(name):
return np.load(s3.open('{}/{}'.format(bucket, name)), allow_pickle=True)
# Use as below
saveLabelsToS3(labels, 'folder/filename.pkl')
labels = readLabelsFromS3('folder/filename.pkl')

Get Binary Representation of PIL Image Without Saving

I am writing an application that uses images intensively. It is composed of two parts. The client part is written in Python. It does some preprocessing on images and sends them over TCP to a Node.js server.
After preprocessing, the Image object looks like this:
window = img.crop((x,y,width+x,height+y))
window = window.resize((48,48),Image.ANTIALIAS)
To send that over socket, I have to have it in binary format. What I am doing now is:
window.save("window.jpg")
infile = open("window.jpg","rb")
encodedWindow = base64.b64encode(infile.read())
#Then send encodedWindow
This is a huge overhead, though, since I am saving the image to the hard disk first, then loading it again to obtain the binary format. This is causing my application to be extremely slow.
I read the documentation of PIL Image, but found nothing useful there.
According to the documentation, (at effbot.org):
"You can use a file object instead of a filename. In this case, you must always specify the format. The file object must implement the seek, tell, and write methods, and be opened in binary mode."
This means you can pass a StringIO object. Write to it and get the size without ever hitting the disk.
Like this:
s = StringIO.StringIO()
window.save(s, "jpg")
encodedWindow = base64.b64encode(s.getvalue())
use BytesIO
from io import BytesIO
from PIL import Image
photo=Image.open('photo.jpg')
s=BytesIO()
photo.save(s,'jpeg')
data = s.getvalue()
with open('photo2.jpg', mode='wb') as f:
f.write(data)
It's about the difference between in-memory file-like object and BufferedReader object.
Here is my experiment in Jupyter(Python 3.8.10):
from PIL import Image as PILImage, ImageOps as PILImageOps
from IPython.display import display, Image
from io import BytesIO
import base64
url = "https://learn.microsoft.com/en-us/archive/msdn-magazine/2018/april/images/mt846470.0418_mccaffreytrun_figure2_hires(en-us,msdn.10).png"
print("get computer-readable bytes from the url")
img_bytes = requests.get(url).content
print(type(img_bytes))
display(Image(img_bytes))
print("convert to in-memory file-like object")
in_memory_file_like_object = BytesIO(img_bytes)
print(type(in_memory_file_like_object))
print("convert to an PIL Image object for manipulating")
pil_img = PILImage.open(in_memory_file_like_object)
print("let's rotate it, and it remains a PIL Image object")
pil_img.show()
rotated_img = pil_img.rotate(45)
print(type(rotated_img))
print("let's create an in-memory file-like object and save the PIL Image object into it")
in_memory_file_like_object = BytesIO()
rotated_img.save(in_memory_file_like_object, 'png')
print(type(in_memory_file_like_object))
print("get computer-readable bytes")
img_bytes = in_memory_file_like_object.getvalue()
print(type(img_bytes))
display(Image(img_bytes))
print('convert to base64 to be transmitted over channels that do not preserve all 8-bits of data, such as email')
# https://stackoverflow.com/a/8909233/3552975
base_64 = base64.b64encode(img_bytes)
print(type(base_64))
# https://stackoverflow.com/a/45928164/3552975
assert base64.b64encode(base64.b64decode(base_64)) == base_64
In short you can save a PIL Image object into an in-memory file-like object by rotated_img.save(in_memory_file_like_object, 'png') as shown above, and then conver the in-memory file-like object into base64.
from io import BytesIO
b = BytesIO()
img.save(b, format="png")
b.seek(0)
data = b.read()
del b

How to unpack pkl file?

I have a pkl file from MNIST dataset, which consists of handwritten digit images.
I'd like to take a look at each of those digit images, so I need to unpack the pkl file, except I can't find out how.
Is there a way to unpack/unzip pkl file?
Generally
Your pkl file is, in fact, a serialized pickle file, which means it has been dumped using Python's pickle module.
To un-pickle the data you can:
import pickle
with open('serialized.pkl', 'rb') as f:
data = pickle.load(f)
For the MNIST data set
Note gzip is only needed if the file is compressed:
import gzip
import pickle
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f)
Where each set can be further divided (i.e. for the training set):
train_x, train_y = train_set
Those would be the inputs (digits) and outputs (labels) of your sets.
If you want to display the digits:
import matplotlib.cm as cm
import matplotlib.pyplot as plt
plt.imshow(train_x[0].reshape((28, 28)), cmap=cm.Greys_r)
plt.show()
The other alternative would be to look at the original data:
http://yann.lecun.com/exdb/mnist/
But that will be harder, as you'll need to create a program to read the binary data in those files. So I recommend you to use Python, and load the data with pickle. As you've seen, it's very easy. ;-)
Handy one-liner
pkl() (
python -c 'import pickle,sys;d=pickle.load(open(sys.argv[1],"rb"));print(d)' "$1"
)
pkl my.pkl
Will print __str__ for the pickled object.
The generic problem of visualizing an object is of course undefined, so if __str__ is not enough, you will need a custom script.
In case you want to work with the original MNIST files, here is how you can deserialize them.
If you haven't downloaded the files yet, do that first by running the following in the terminal:
wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Then save the following as deserialize.py and run it.
import numpy as np
import gzip
IMG_DIM = 28
def decode_image_file(fname):
result = []
n_bytes_per_img = IMG_DIM*IMG_DIM
with gzip.open(fname, 'rb') as f:
bytes_ = f.read()
data = bytes_[16:]
if len(data) % n_bytes_per_img != 0:
raise Exception('Something wrong with the file')
result = np.frombuffer(data, dtype=np.uint8).reshape(
len(bytes_)//n_bytes_per_img, n_bytes_per_img)
return result
def decode_label_file(fname):
result = []
with gzip.open(fname, 'rb') as f:
bytes_ = f.read()
data = bytes_[8:]
result = np.frombuffer(data, dtype=np.uint8)
return result
train_images = decode_image_file('train-images-idx3-ubyte.gz')
train_labels = decode_label_file('train-labels-idx1-ubyte.gz')
test_images = decode_image_file('t10k-images-idx3-ubyte.gz')
test_labels = decode_label_file('t10k-labels-idx1-ubyte.gz')
The script doesn't normalize the pixel values like in the pickled file. To do that, all you have to do is
train_images = train_images/255
test_images = test_images/255
The pickle (and gzip if the file is compressed) module need to be used
NOTE: These are already in the standard Python library.
No need to install anything new

How do I read image data from a URL?

What I'm trying to do is fairly simple when we're dealing with a local file, but the problem comes when I try to do this with a remote URL.
Basically, I'm trying to create a PIL image object from a file pulled from a URL. Sure, I could always just fetch the URL and store it in a temp file, then open it into an image object, but that feels very inefficient.
Here's what I have:
Image.open(urlopen(url))
It flakes out complaining that seek() isn't available, so then I tried this:
Image.open(urlopen(url).read())
But that didn't work either. Is there a Better Way to do this, or is writing to a temporary file the accepted way of doing this sort of thing?
In Python3 the StringIO and cStringIO modules are gone.
In Python3 you should use:
from PIL import Image
import requests
from io import BytesIO
response = requests.get(url)
img = Image.open(BytesIO(response.content))
Using a StringIO
import urllib, cStringIO
file = cStringIO.StringIO(urllib.urlopen(URL).read())
img = Image.open(file)
The following works for Python 3:
from PIL import Image
import requests
im = Image.open(requests.get(url, stream=True).raw)
References:
https://github.com/python-pillow/Pillow/pull/1151
https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst#280-2015-04-01
Using requests:
from PIL import Image
import requests
from StringIO import StringIO
response = requests.get(url)
img = Image.open(StringIO(response.content))
Python 3
from urllib.request import urlopen
from PIL import Image
img = Image.open(urlopen(url))
img
Jupyter Notebook and IPython
import IPython
url = 'https://newevolutiondesigns.com/images/freebies/colorful-background-14.jpg'
IPython.display.Image(url, width = 250)
Unlike other methods, this method also works in a for loop!
Use StringIO to turn the read string into a file-like object:
from StringIO import StringIO
from PIL import Image
import urllib
Image.open(StringIO(urllib.request.urlopen(url).read()))
For those doing some sklearn/numpy post processing (i.e. Deep learning) you can wrap the PIL object with np.array(). This might save you from having to Google it like I did:
from PIL import Image
import requests
import numpy as np
from StringIO import StringIO
response = requests.get(url)
img = np.array(Image.open(StringIO(response.content)))
The arguably recommended way to do image input/output these days is to use the dedicated package ImageIO. Image data can be read directly from a URL with one simple line of code:
from imageio import imread
image = imread('https://cdn.sstatic.net/Sites/stackoverflow/img/logo.png')
Many answers on this page predate the release of that package and therefore do not mention it. ImageIO started out as component of the Scikit-Image toolkit. It supports a number of scientific formats on top of the ones provided by the popular image-processing library PILlow. It wraps it all in a clean API solely focused on image input/output. In fact, SciPy removed its own image reader/writer in favor of ImageIO.
select the image in chrome, right click on it, click on Copy image address, paste it into a str variable (my_url) to read the image:
import shutil
import requests
my_url = 'https://www.washingtonian.com/wp-content/uploads/2017/06/6-30-17-goat-yoga-congressional-cemetery-1-994x559.jpg'
response = requests.get(my_url, stream=True)
with open('my_image.png', 'wb') as file:
shutil.copyfileobj(response.raw, file)
del response
open it;
from PIL import Image
img = Image.open('my_image.png')
img.show()
Manually wrapping in BytesIO is no longer needed since PIL >= 2.8.0. Just use Image.open(response.raw)
Adding on top of Vinícius's comment:
You should pass stream=True as noted https://requests.readthedocs.io/en/master/user/quickstart/#raw-response-content
So
img = Image.open(requests.get(url, stream=True).raw)
USE urllib.request.urlretrieve() AND PIL.Image.open() TO DOWNLOAD AND READ IMAGE DATA :
import requests
import urllib.request
import PIL
urllib.request.urlretrieve("https://i.imgur.com/ExdKOOz.png", "sample.png")
img = PIL.Image.open("sample.png")
img.show()
or Call requests.get(url) with url as the address of the object file to download via a GET request. Call io.BytesIO(obj) with obj as the content of the response to load the raw data as a bytes object. To load the image data, call PIL.Image.open(bytes_obj) with bytes_obj as the bytes object:
import io
response = requests.get("https://i.imgur.com/ExdKOOz.png")
image_bytes = io.BytesIO(response.content)
img = PIL.Image.open(image_bytes)
img.show()
from PIL import Image
import cv2
import numpy as np
import requests
image=Image.open(requests.get("https://previews.123rf.com/images/darrenwhi/darrenwhi1310/darrenwhi131000024/24022179-photo-of-many-cars-with-one-a-different-color.jpg", stream=True).raw)
#image =resize((420,250))
image_array=np.array(image)
image
To directly get image as numpy array without using PIL
import requests, io
import matplotlib.pyplot as plt
response = requests.get(url).content
img = plt.imread(io.BytesIO(response), format='JPG')
plt.imshow(img)

Categories

Resources