I'm trying to convert a base64 string to image and get the following error.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 2288, in open
% (filename if filename else fp))
IOError: cannot identify image file <cStringIO.StringI object at 0x7fe6d9e88828>
There is no prefix like data:image/png;base64. I get the base64 string from an image and try to convert it back to an image. Here is my code.
# -*- coding: utf-8 -*-
import requests
import base64
from PIL import Image
from cStringIO import StringIO
import zipfile
r = requests.get('https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png', stream=False)
img = Image.open(StringIO(r.content))
b64str = base64.b64encode(img.tobytes())
data = base64.b64decode(b64str)
newimg = Image.open(StringIO(data))
And I get the error above. Can anyone help? Thanks!
You open .PNG file from the web and get the RAW image, which is RGB values, then encode that into base64 and back, which still gives you RAW RGB values which cannot be read by Image.open() because these are not an image file (jpg, png, etc), but RAW RGB values.
The most reasonable would be:
newImg = data # that's it
Or if you want to make an Image:
newImg = Image.frombytes(img.mode, img.size, data)
and get mode and size from the original image.
Related
I have a string base64 image that need to convert so then I can read it as image to analyze with pytesseract:
import base64
import io
from PIL import Image
import pytesseract
import sys
base64_string = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFh....."
img_data = base64.b64decode(base64_string)
img = Image.open(io.BytesIO(img_data)) # <== ERROR LINE
text = pytesseract.image_to_string(img, config='--psm 6')
print(text)
gives the error:
Traceback (most recent call last):
File "D:\aa\xampp\htdocs\xbanca\aa.py", line 14, in <module>
img = Image.open(io.BytesIO(img_data))
File "D:\python3.10.10\lib\site-packages\PIL\Image.py", line 3283, in open
raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x000001A076F673D0>
I tried using numpy and request libraries but all have the same result.. and the base64 example image is working ok in any another converter.
That's a very common misunderstanding.
The string
base64_string = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFh....."
is not a Base64 string, but a DataURL
URLs prefixed with the data: scheme, allow content creators to embed small files inline in documents
that contains a Base64 string.
The Base64 string starts directly after 'base64,'. Therefore you need to cut off the 'data:image/jpeg;base64,' part.
e.g.:
b64 = base64_string.split(",")[1]
after that you can decode the data:
img_data = base64.b64decode(b64)
I modified the code from the question and used the base64 of the following small JPEG image which I base64 encoded on https://www.base64encode.org/:
and got the expected text output:
1 Answer
I need to send an jpg image over network via json. I tried to convert the data into str via base64, as below:
from PIL import Image
from tinydb import TinyDB, Query
import base64
import io
from pdb import set_trace as bp
# note: with 'encoding' in name, it is always a bytes obj
in_jpg_encoding = None
# open some randome image
with open('rkt2.jpg', 'rb') as f:
# The file content is a jpeg encoded bytes object
in_jpg_encoding = f.read()
# output is a bytes object
in_b64_encoding = base64.b64encode(in_jpg_encoding)
# interpret above bytes as str, because json value need to be string
in_str = in_b64_encoding.decode(encoding='utf-8')
# in_str = str(in_b64_encoding) # alternative way of above statement
# simulates a transmission, e.g. sending the image data over internet using json
out_str = in_str
# strip-off the utf-8 interpretation to restore it as a base64 encoding
out_utf8_encoding = out_str.encode(encoding='utf-8')
# out_utf8_encoding = out_str.encode() # same way of writing above statement
# strip off the base64 encoding to restore it as its original jpeg encoded conent
# note: output is still a bytes obj due to b64 decoding
out_b64_decoding = base64.b64decode(out_utf8_encoding)
out_jpg_encoding = out_b64_decoding
# ---- verification stage
out_jpg_file = io.BytesIO(out_jpg_encoding)
out_jpg_image = Image.open(out_jpg_file)
out_jpg_image.show()
But I got error at the deserialization stage, saying the cannot identify the image as file:
Traceback (most recent call last):
File "3_test_img.py", line 38, in <module>
out_jpg_image = Image.open(out_jpg_file)
File "/home/gaopeng/Envs/venv_celeb_parser/lib/python3.6/site-packages/PIL/Image.py", line 2687, in open
% (filename if filename else fp))
OSError: cannot identify image file <_io.BytesIO object at 0x7f6f823c6b48>
Did I missed something?
Having trouble with this error code regarding the following code for Pytesseract. (Python 3.6.1, Mac OSX)
import pytesseract
import requests
from PIL import Image
from PIL import ImageFilter
from io import StringIO, BytesIO
def process_image(url):
image = _get_image(url)
image.filter(ImageFilter.SHARPEN)
return pytesseract.image_to_string(image)
def _get_image(url):
r = requests.get(url)
s = BytesIO(r.content)
img = Image.open(s)
return img
process_image("https://www.prepressure.com/images/fonts_sample_ocra_medium.png")
Error:
/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/g/pyfo/reddit/ocr.py
Traceback (most recent call last):
File "/Users/g/pyfo/reddit/ocr.py", line 20, in <module>
process_image("https://www.prepressure.com/images/fonts_sample_ocra_medium.png")
File "/Users/g/pyfo/reddit/ocr.py", line 10, in process_image
image.filter(ImageFilter.SHARPEN)
File "/usr/local/lib/python3.6/site-packages/PIL/Image.py", line 1094, in filter
return self._new(filter.filter(self.im))
File "/usr/local/lib/python3.6/site-packages/PIL/ImageFilter.py", line 53, in filter
raise ValueError("cannot filter palette images")
ValueError: cannot filter palette images
Process finished with exit code 1
Seems simple enough, but is not working. Any help would be greatly appreciated.
The image you have is a pallet-based image. You need to convert it to a full RGB image in order to use the PIL filters.
import pytesseract
import requests
from PIL import Image, ImageFilter
from io import StringIO, BytesIO
def process_image(url):
image = _get_image(url)
image = image.convert('RGB')
image = image.filter(ImageFilter.SHARPEN)
return pytesseract.image_to_string(image)
def _get_image(url):
r = requests.get(url)
s = BytesIO(r.content)
img = Image.open(s)
return img
process_image("https://www.prepressure.com/images/fonts_sample_ocra_medium.png")
You should also note that the the .convert() and .filter() methods return a copy of the image, they don't change the existing image object. You need to assign the return value to a variable as shown in the code above.
NOTE: I don't have pytesseract, so I can't check the last line of process_image().
from PIL import Image
image = Image.open("image.jpg")
file_path = io.BytesIO();
image.save(file_path,'JPEG');
image2 = Image.open(file_path.getvalue());
I get this error TypeError: embedded NUL character on the last statement Image.open on running the program
What is the correct way to open a file from streams?
http://effbot.org/imagingbook/introduction.htm#more-on-reading-images
from PIL import Image
import StringIO
buffer = StringIO.StringIO()
buffer.write(open('image.jpeg', 'rb').read())
buffer.seek(0)
image = Image.open(buffer)
print image
# <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=800x600 at 0x7FE2EEE2B098>
# if we try open again
image = Image.open(buffer)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 2028, in open
raise IOError("cannot identify image file")
IOError: cannot identify image file
Make sure you call buff.seek(0) before reading any StringIO objects. Otherwise you'll be reading from the end of the buffer, which will look like an empty file and is likely causing the error you're seeing.
Using BytesIO is much more simple, it took me a while to figure out. This allows you to read and write to zip files for example.
from PIL import Image
from io import BytesIO
# bytes of a simple 2x2 gif file
gif_bytes = b'\x47\x49\x46\x38\x39\x61\x02\x00\x02\x00\x80\x00\x00\x00\xFF\xFF\xFF\x21\xF9\x04\x00\x00\x00\x00\x00\x2C\x00\x00\x00\x00\x02\x00\x02\x00\x00\x02\x03\x44\x02\x05\x00\x3B'
gif_bytes_io = BytesIO() # or io.BytesIO()
# store the gif bytes to the IO and open as image
gif_bytes_io.write(gif_bytes)
image = Image.open(gif_bytes_io)
# optional proof of concept:
# image.show()
# save as png through a stream
png_bytes_io = BytesIO() # or io.BytesIO()
image.save(png_bytes_io, format='PNG')
print(png_bytes_io.getvalue()) # outputs the byte stream of the png
How to read image from StringIO into PIL in python? I will have a stringIO object. How to I read from it with a image in it? I cant event have ot read a image from a file. Wow!
from StringIO import StringIO
from PIL import Image
image_file = StringIO(open("test.gif",'rb').readlines())
im = Image.open(image_file)
print im.format, "%dx%d" % im.size, im.mode
Traceback (most recent call last):
File "/home/ubuntu/workspace/receipt/imap_poller.py", line 22, in <module>
im = Image.open(image_file)
File "/usr/local/lib/python2.7/dist-packages/Pillow-2.3.1-py2.7-linux-x86_64.egg/PIL/Image.py", line 2028, in open
raise IOError("cannot identify image file")
IOError: cannot identify image file
Don't use readlines(), it returns a list of strings which is not what you want. To retrieve the bytes from the file, use read() function instead.
Your example worked out of the box with read() and a JPG file on my PC:
# Python 2.x
>>>from StringIO import StringIO
# Python 3.x
>>>from io import StringIO
>>>from PIL import Image
>>>image_file = StringIO(open("test.jpg",'rb').read())
>>>im = Image.open(image_file)
>>>print im.size, im.mode
(2121, 3508) RGB