PIL isn't able to show images - python

I've been trying to show a image with PIL but I don't know why the image viewer keeps giving me this:"Windows photo viewer can't open this picture because either the picture is deleted,or it's in a location that isn't available.
I've checked the pic is on my desktop and doesn't have any problem.
Here is my code:
from PIL import Image
img = Image.open('photo_2016-08-04_22-38-11.jpg')
img.show()
Could any one help me with this?

Try this little snippet:
from PIL import Image
import os
home_path = os.path.expanduser('~')
filename = os.path.join(home_path,'Desktop','photo_2016-08-04_22-38-11.jpg')
if os.path.exists(filename):
print "Opening filename {0}".format(filename)
img = Image.open(filename)
img.show()
else:
print "Filename {0} doesn't exist".format(filename)

Related

Can we read Following image of captcha if yes then how to do that with pytesseract

I am trying to read images with pytesseract but on one website it works and on other site it don't work.
The following image is readable by my current code.
The following image is unable to be read by my current code.
from PIL import Image
from pytesseract import pytesseract
#Define path to tessaract.exe
path_to_tesseract = '/usr/bin/tesseract'
#Define path to image
path_to_image = './captcha.png'
#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract
#Open image with PIL
img = Image.open(path_to_image)
#Extract text from image
text = pytesseract.image_to_string(img)
print(text)

PDF to Image and downloading it to a specific folder using Wand Python

I am trying to convert all the pages of a PDF to images and save them to a specific working directory.
The code is:
from wand.image import Image
from wand.image import Image as wi
pdf = wi(filename="work.pdf", resolution=300)
pdfimage = pdf.convert("jpeg")
i=1
for img in pdfimage.sequence:
page = wi(image=img)
page.save(filename=r"C:\Users\...\work" + str(i) + ".jpg")
i +=1
As you can see, I am converting each page to jpg format and then am trying to save them in the folder. But due to some reason, it is not working.
If instead of the second last line, I try:
from wand.image import Image as wi
pdf = wi(filename="work.pdf", resolution=300)
pdfimage = pdf.convert("jpeg")
i=1
for img in pdfimage.sequence:
page = wi(image=img)
#page.save(filename=r"C:\Users\...\work" + str(i) + ".jpg")
page.save(filename=str(i)+".jpg")
i +=1
then it saves successfully but in the folder C:\Users\Me.
How can I save them in the working directory?
Try this...
import os
from wand.image import Image as wi
with wi(filename="work.pdf", resolution=300) as pdf:
pdf.scene = 1
pdf.save(filename=os.path.join(os.getcwd(),"work%02d.jpg")
Wand should also support pathlib, or other classes that implement __fspath__() itereface.

PySimpleGui - Not able to display images when the url is remote (or maybe when its jpg)

Im following along with the Image Viewer demo and have adapted the code as below
from PIL import Image,ImageTk
import requests
import PySimpleGUI as sg
import io
def get_img_data(f, maxsize=(800, 640), first=False):
"""Generate image data using PIL
"""
img = Image.open(f)
img.thumbnail(maxsize)
if first: # tkinter is inactive the first time
bio = io.BytesIO()
img.save(bio, format="PNG")
del img
print(bio.getvalue())
return bio.getvalue()
return ImageTk.PhotoImage(img)
url = 'https://upload.wikimedia.org/wikipedia/commons/c/c5/JPEG_example_down.jpg'
response = requests.get(url,stream=True)
image_data = get_img_data(response.raw,first=True)
image_elem = sg.Image(data=image_data)
layout = [
[image_elem],
]
window = sg.Window("Image",layout=layout)
while True:
event,response = window.read()
image_elem.update(data=get_img_data(response.raw, first=True))
I get this error
TclError: couldn't recognize image data
It doesn't matter if the get_img_data is true or false in the event loop. Both generate either this error for me or an error that says
'Too early to create image'
I think the issue might be that the url is a jpg, but the below code works fine and shows a png file.
from PIL import Image
import requests
url = 'https://upload.wikimedia.org/wikipedia/commons/c/c5/JPEG_example_down.jpg'
response = requests.get(url,stream=True)
img = Image.open(response.raw)
img.show()
Any insight into this would be appreciated. Thanks

Extract text from PNG images using Python tesseract

Recently, I took a project. Converting a scanned PDF to searchable PDF/word using Python tesseract.
After few attempts, I could able to convert scanned PDF to PNG image files and afterwards, I'm struck could anyone please help me to convert the PNG files to Word/PDF searchable.my piece of code attached
Please find the attached image for reference.
Import os
Import sys
from PIL import image
Import pytesseract
from pytesseract import image_to_string
Libpath =r'_______' #site-package
Pop_path=r'_______' #poppler dlls
Sys.path.insert(0,LibPath)
from pdf2image import convert_from_path
Pdfpath=r'_______' # PDF file directory
imgpath=r'_______' #image output path
images= convert_from_path(pdf_path = pdfpath,
dpi=500, poppler_path= pop_path)
for idx, of in enumerate (images):
pg.save(imgPath+'PDF_Page_'+'.png',"PNG")
print('{} page converted'.format(str(idx)))
try:
from PIL import image
except ImportError:
import image
import pytesseract
def ocr-core(images):
Text =
pytesseract.image_to_string(image.open(images))
return text
print(ocr_core("image path/imagename))
that's it, I've written.....then I got multiple ".PNG" images...now I can only able to convert one PNG images to text.
How to convert all the images and save it in CSV/word?
from PIL import image
from pdf2image import convert_from_path
import pytesseract
import OS
import sys
Pdf_file_path = '_______' #your file path
Images = convert_from_path(Pdf_file_path, dpi=500)
Counter=1
for page in Images:
idx= "image_"+str(Counter)+".jpg" ##or ".png"
page.save(idx, 'JPEG')
Counter = Counter+1
file=Counter-1
Output= '_____' #where you want to save and file name
f=open(output, "w")
for i in range(1,file+1):
idx= "image_"+str(Counter)+".jpg" ##or ".png"
text=str(pytesseract.image_to_string(Image.open(idx)))
f.write(text)
f.close()

Unable to read image data when converting from PDF to Image

I am trying to convert the PDF to Image to proceed further with the Tesseract. It works when I convert using cmd:
magick convert a.pdf b.png
But doesn't work when I try to do the same using Python:
from wand.image import Image
with Image (filename='a.pdf') as img:
img.save(filename = 'sample.png')`
The error I get is:
unable to read image data D:/Users/UserName/AppData/Local/Temp/magick-4908Cq41DDA5FxlX1 # error/pnm.c/ReadPNMImage/1346
I have also installed ghostscipt but the error is still there.
EDIT:
I took the code provided in the reply below and modified it to read all the pages. The original issue is still there and the code below uses pdf2image:
from pdf2image import convert_from_path
import os
pdf_dir = "D:/Users/UserName/Desktop/scraping"
for pdf_file in os.listdir(pdf_dir):
if pdf_file.endswith(".pdf"):
pages = convert_from_path(pdf_file, 300)
pdf_name = pdf_file[:-4]
for page in pages:
page.save("%s-page%d.jpg" % (pdf_name, pages.index(page)), "JPEG")
Instead of using wand.image, you can use pdf2image. Install it like this:
pip install pdf2image
Here is a code that loops through every page in the PDF, finally converting them to JPEG:
import os
import tempfile
from pdf2image import convert_from_path
filename = 'target.pdf'
with tempfile.TemporaryDirectory() as path:
images_from_path = convert_from_path(filename, output_folder=path, last_page=1, first_page =0)
base_filename = os.path.splitext(os.path.basename(filename))[0] + '.jpg'
save_dir = 'dir'
for page in images_from_path:
page.save(os.path.join(save_dir, base_filename), 'JPEG')

Categories

Resources