How to serialize a image into str and deserialize it as image? - python

I need to send an jpg image over network via json. I tried to convert the data into str via base64, as below:
from PIL import Image
from tinydb import TinyDB, Query
import base64
import io
from pdb import set_trace as bp
# note: with 'encoding' in name, it is always a bytes obj
in_jpg_encoding = None
# open some randome image
with open('rkt2.jpg', 'rb') as f:
# The file content is a jpeg encoded bytes object
in_jpg_encoding = f.read()
# output is a bytes object
in_b64_encoding = base64.b64encode(in_jpg_encoding)
# interpret above bytes as str, because json value need to be string
in_str = in_b64_encoding.decode(encoding='utf-8')
# in_str = str(in_b64_encoding) # alternative way of above statement
# simulates a transmission, e.g. sending the image data over internet using json
out_str = in_str
# strip-off the utf-8 interpretation to restore it as a base64 encoding
out_utf8_encoding = out_str.encode(encoding='utf-8')
# out_utf8_encoding = out_str.encode() # same way of writing above statement
# strip off the base64 encoding to restore it as its original jpeg encoded conent
# note: output is still a bytes obj due to b64 decoding
out_b64_decoding = base64.b64decode(out_utf8_encoding)
out_jpg_encoding = out_b64_decoding
# ---- verification stage
out_jpg_file = io.BytesIO(out_jpg_encoding)
out_jpg_image = Image.open(out_jpg_file)
out_jpg_image.show()
But I got error at the deserialization stage, saying the cannot identify the image as file:
Traceback (most recent call last):
File "3_test_img.py", line 38, in <module>
out_jpg_image = Image.open(out_jpg_file)
File "/home/gaopeng/Envs/venv_celeb_parser/lib/python3.6/site-packages/PIL/Image.py", line 2687, in open
% (filename if filename else fp))
OSError: cannot identify image file <_io.BytesIO object at 0x7f6f823c6b48>
Did I missed something?

Related

Error loading base64 image: PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO

I have a string base64 image that need to convert so then I can read it as image to analyze with pytesseract:
import base64
import io
from PIL import Image
import pytesseract
import sys
base64_string = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFh....."
img_data = base64.b64decode(base64_string)
img = Image.open(io.BytesIO(img_data)) # <== ERROR LINE
text = pytesseract.image_to_string(img, config='--psm 6')
print(text)
gives the error:
Traceback (most recent call last):
File "D:\aa\xampp\htdocs\xbanca\aa.py", line 14, in <module>
img = Image.open(io.BytesIO(img_data))
File "D:\python3.10.10\lib\site-packages\PIL\Image.py", line 3283, in open
raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x000001A076F673D0>
I tried using numpy and request libraries but all have the same result.. and the base64 example image is working ok in any another converter.
That's a very common misunderstanding.
The string
base64_string = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFh....."
is not a Base64 string, but a DataURL
URLs prefixed with the data: scheme, allow content creators to embed small files inline in documents
that contains a Base64 string.
The Base64 string starts directly after 'base64,'. Therefore you need to cut off the 'data:image/jpeg;base64,' part.
e.g.:
b64 = base64_string.split(",")[1]
after that you can decode the data:
img_data = base64.b64decode(b64)
I modified the code from the question and used the base64 of the following small JPEG image which I base64 encoded on https://www.base64encode.org/:
and got the expected text output:
1 Answer

Opening a PNG fails with `cannot identify image file`

I'm trying to open a PNG file with Python. I do believe I have a properly encoded PNG.
It starts with:
\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR
And ends with:
\x00IEND\xaeB`\x82
My code so far:
import PIL.Image as Image
with open('./test_image_3.txt', 'rb') as f:
b = f.read()
b = base64.b64decode(b).decode("unicode_escape").encode("latin-1")
b = b.decode('utf-16-le')
img = Image.open(io.BytesIO(b))
img.show()
b = base64.b64decode(b).decode("unicode_escape").encode("latin-1")
UnicodeDecodeError: 'unicodeescape' codec can't decode bytes in position 178-179: truncated \uXXXX escape
Unfortunately I can't read the file you've provided as the website butchered it massively. Either use pastebin or github (or something similar) where it'll be possible to retrieve text/plain e.g. via curl so I can attempt to reproduce the problem 1:1 for the contents.
However, the general approach would be this:
from PIL import Image
with Image.open("./test_image_3.txt") as im:
im.show()
it's directly from Pillow's documentation and it does not care about the file's name or extension.
Alternatively, if you have open() call with a file handle:
from PIL import Image
with open("./test_image_3.txt", "rb") as file:
with Image.open(file) as im:
im.show()
And if you have it mangled somehow, then judging from your encode() and decode() calls it would be this:
from PIL import Image
from io import BytesIO
data = <some raw PNG bytes, the original image>
# here I store it in that weird format and write as bytes
with open("img.txt", "wb") as file:
file.write(data.decode("latin-1").encode("unicode_escape"))
# here I read it back as bytes, reverse the chain of calls and invert
# the call pairs for en/decoding so encode() -> decode() and vice-versa
with open("img.txt","rb") as file:
content = BytesIO()
content.write(
file.read().decode("unicode_escape").encode("latin-1")
)
# seek back, so the BytesIO() can return back the full content
content.seek(0)
# then simply read as if using a file handle
with Image.open(content) as img:
img.show()

PIL open image from base64 failed

I'm trying to convert a base64 string to image and get the following error.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 2288, in open
% (filename if filename else fp))
IOError: cannot identify image file <cStringIO.StringI object at 0x7fe6d9e88828>
There is no prefix like data:image/png;base64. I get the base64 string from an image and try to convert it back to an image. Here is my code.
# -*- coding: utf-8 -*-
import requests
import base64
from PIL import Image
from cStringIO import StringIO
import zipfile
r = requests.get('https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png', stream=False)
img = Image.open(StringIO(r.content))
b64str = base64.b64encode(img.tobytes())
data = base64.b64decode(b64str)
newimg = Image.open(StringIO(data))
And I get the error above. Can anyone help? Thanks!
You open .PNG file from the web and get the RAW image, which is RGB values, then encode that into base64 and back, which still gives you RAW RGB values which cannot be read by Image.open() because these are not an image file (jpg, png, etc), but RAW RGB values.
The most reasonable would be:
newImg = data # that's it
Or if you want to make an Image:
newImg = Image.frombytes(img.mode, img.size, data)
and get mode and size from the original image.

save base64 image python

I am trying to save an image with python that is Base64 encoded. Here the string is to large to post but here is the image
And when received by python the last 2 characters are == although the string is not formatted so I do this
import base64
data = "data:image/png;base64," + photo_base64.replace(" ", "+")
And then I do this
imgdata = base64.b64decode(data)
filename = 'some_image.jpg' # I assume you have a way of picking unique filenames
with open(filename, 'wb') as f:
f.write(imgdata)
But this causes this error
Traceback (most recent call last):
File "/var/www/cgi-bin/save_info.py", line 83, in <module>
imgdata = base64.b64decode(data)
File "/usr/lib64/python2.7/base64.py", line 76, in b64decode
raise TypeError(msg)
TypeError: Incorrect padding
I also printed out the length of the string once the data:image/png;base64, has been added and the spaces replace with + and it has a length of 34354, I have tried a bunch of different images but all of them when I try to open the saved file say that the file is damaged.
What is happening and why is the file corrupt?
Thanks
EDIT
Here is some base64 that also failed
iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAADBQTFRFA6b1q Ci5/f2lt/9yu3 Y8v2cMpb1/DSJbz5i9R2NLwfLrWbw m T8I8////////SvMAbAAAABB0Uk5T////////////////////AOAjXRkAAACYSURBVHjaLI8JDgMgCAQ5BVG3//9t0XYTE2Y5BPq0IGpwtxtTP4G5IFNMnmEKuCopPKUN8VTNpEylNgmCxjZa2c1kafpHSvMkX6sWe7PTkwRX1dY7gdyMRHZdZ98CF6NZT2ecMVaL9tmzTtMYcwbP y3XeTgZkF5s1OSHwRzo1fkILgWC5R0X4BHYu7t/136wO71DbvwVYADUkQegpokSjwAAAABJRU5ErkJggg==
This is what I receive in my python script from the POST Request
Note I have not replace the spaces with +'s
There is no need to add data:image/png;base64, before, I tried using the code below, it works fine.
import base64
data = 'iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAADBQTFRFA6b1q Ci5/f2lt/9yu3 Y8v2cMpb1/DSJbz5i9R2NLwfLrWbw m T8I8////////SvMAbAAAABB0Uk5T////////////////////AOAjXRkAAACYSURBVHjaLI8JDgMgCAQ5BVG3//9t0XYTE2Y5BPq0IGpwtxtTP4G5IFNMnmEKuCopPKUN8VTNpEylNgmCxjZa2c1kafpHSvMkX6sWe7PTkwRX1dY7gdyMRHZdZ98CF6NZT2ecMVaL9tmzTtMYcwbP y3XeTgZkF5s1OSHwRzo1fkILgWC5R0X4BHYu7t/136wO71DbvwVYADUkQegpokSjwAAAABJRU5ErkJggg=='.replace(' ', '+')
imgdata = base64.b64decode(data)
filename = 'some_image.jpg' # I assume you have a way of picking unique filenames
with open(filename, 'wb') as f:
f.write(imgdata)
If you append data:image/png;base64, to data, then you get error. If You have this, you must replace it.
new_data = initial_data.replace('data:image/png;base64,', '')

Encoding an image file with base64

I want to encode an image into a string using the base64 module. I've ran into a problem though. How do I specify the image I want to be encoded? I tried using the directory to the image, but that simply leads to the directory being encoded. I want the actual image file to be encoded.
EDIT
I tried this snippet:
with open("C:\Python26\seriph1.BMP", "rb") as f:
data12 = f.read()
UU = data12.encode("base64")
UUU = base64.b64decode(UU)
print UUU
self.image = ImageTk.PhotoImage(Image.open(UUU))
but I get the following error:
Traceback (most recent call last):
File "<string>", line 245, in run_nodebug
File "C:\Python26\GUI1.2.9.py", line 473, in <module>
app = simpleapp_tk(None)
File "C:\Python26\GUI1.2.9.py", line 14, in __init__
self.initialize()
File "C:\Python26\GUI1.2.9.py", line 431, in initialize
self.image = ImageTk.PhotoImage(Image.open(UUU))
File "C:\Python26\lib\site-packages\PIL\Image.py", line 1952, in open
fp = __builtin__.open(fp, "rb")
TypeError: file() argument 1 must be encoded string without NULL bytes, not str
What am I doing wrong?
I'm not sure I understand your question. I assume you are doing something along the lines of:
import base64
with open("yourfile.ext", "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
You have to open the file first of course, and read its contents - you cannot simply pass the path to the encode function.
Edit:
Ok, here is an update after you have edited your original question.
First of all, remember to use raw strings (prefix the string with 'r') when using path delimiters on Windows, to prevent accidentally hitting an escape character. Second, PIL's Image.open either accepts a filename, or a file-like (that is, the object has to provide read, seek and tell methods).
That being said, you can use cStringIO to create such an object from a memory buffer:
import cStringIO
import PIL.Image
# assume data contains your decoded image
file_like = cStringIO.StringIO(data)
img = PIL.Image.open(file_like)
img.show()
The first answer will print a string with prefix b'.
That means your string will be like this b'your_string' To solve this issue please add the following line of code.
encoded_string= base64.b64encode(img_file.read())
print(encoded_string.decode('utf-8'))
I have experienced this while converting Image to Base64 string. You can take a look at how I removed that from there also. Link is here Image to base64 string and fix 'b from prefix
import base64
from PIL import Image
from io import BytesIO
with open("image.jpg", "rb") as image_file:
data = base64.b64encode(image_file.read())
im = Image.open(BytesIO(base64.b64decode(data)))
im.save('image1.png', 'PNG')
Borrowing from what Ivo van der Wijk and gnibbler have developed earlier, this is a dynamic solution
import cStringIO
import PIL.Image
image_data = None
def imagetopy(image, output_file):
with open(image, 'rb') as fin:
image_data = fin.read()
with open(output_file, 'w') as fout:
fout.write('image_data = '+ repr(image_data))
def pytoimage(pyfile):
pymodule = __import__(pyfile)
img = PIL.Image.open(cStringIO.StringIO(pymodule.image_data))
img.show()
if __name__ == '__main__':
imagetopy('spot.png', 'wishes.py')
pytoimage('wishes')
You can then decide to compile the output image file with Cython to make it cool. With this method, you can bundle all your graphics into one module.
As I said in your previous question, there is no need to base64 encode the string, it will only make the program slower. Just use the repr
>>> with open("images/image.gif", "rb") as fin:
... image_data=fin.read()
...
>>> with open("image.py","wb") as fout:
... fout.write("image_data="+repr(image_data))
...
Now the image is stored as a variable called image_data in a file called image.py
Start a fresh interpreter and import the image_data
>>> from image import image_data
>>>
Its work for me
import base64
import requests
# Getting image in bytes
response = requests.get("image_url")
# image encoding
encoded_image = base64.b64encode(response.content)
# image decoding and without it's won't work due to some '\xff' error
decoded_image= base64.b64decode(encoded_image)

Categories

Resources