Hello I am trying to make a python script that takes an image file from a computer and turn it into text. At the moment, I have the following code
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import pytesseract
Tk().withdraw()
filename = askopenfilename()
print(filename)
pytesseract.pytesseract.tesseract_cmd = filename
print(pytesseract.image_to_string(filename))
However this gives me the error
Traceback (most recent call last):
File "main.py", line 11, in <module>
print(pytesseract.image_to_string(filename))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytesseract/pytesseract.py", line 409, in image_to_string
return {
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytesseract/pytesseract.py", line 412, in <lambda>
Output.STRING: lambda: run_and_get_output(*args),
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytesseract/pytesseract.py", line 287, in run_and_get_output
run_tesseract(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytesseract/pytesseract.py", line 258, in run_tesseract
raise e
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytesseract/pytesseract.py", line 255, in run_tesseract
proc = subprocess.Popen(cmd_args, **subprocess_args())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/subprocess.py", line 854, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/subprocess.py", line 1702, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
PermissionError: [Errno 13] Permission denied: '/Users/william/theimage.jpg'
What am I doing wrong?
this: pytesseract.pytesseract.tesseract_cmd = filename is absolutely wrong.
tesseract_cmd should have your tesseract engine executable path.
you need to find where you installed tesseract.
pytesseract.pytesseract.tesseract_cmd = <path_to_tesseract_engine>
# example
# pytesseract.pytesseract.tesseract_cmd = "/some_folder1/some_folder2/...
# /<where_you_installed_tesseract_cmd>/tesseract.<whatever_is_the_extension_for_executables_on_mac>"
you get permission error because you set the tesseract_cmd to link the path to your image and when you actually use pytesseract.image_to_string(<your_image>) ofcourse it raises permission denied because the image is used by the tesseract_cmd process.
after you set your engine.
this is gonna be fine
print(pytesseract.image_to_string(filename))
Related
I'm trying to work with pydub to get the times of each word spoken in an audio file, but python throws me a FileNotFoundError
The code:
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
path = "E:/PyApps/New/PythonApplication1/OSR_us_000_0010_8k.wav"
audio_segment = AudioSegment.from_wav(path)
The error:
Traceback (most recent call last):
File "E:\PyApps\New\PythonApplication1\module1.py", line 5, in
audio_segment = AudioSegment.from_wav(path)
File "C:\Users\Dean\AppData\Local\Programs\Python\Python39\lib\site-packages\pydub\audio_segment.py", line 808, in from_wav
return cls.from_file(file, 'wav', parameters=parameters)
File "C:\Users\Dean\AppData\Local\Programs\Python\Python39\lib\site-packages\pydub\audio_segment.py", line 728, in from_file
info = mediainfo_json(orig_file, read_ahead_limit=read_ahead_limit)
File "C:\Users\Dean\AppData\Local\Programs\Python\Python39\lib\site-packages\pydub\utils.py", line 274, in mediainfo_json
res = Popen(command, stdin=stdin_parameter, stdout=PIPE, stderr=PIPE)
File "C:\Users\Dean\AppData\Local\Programs\Python\Python39\lib\subprocess.py", line 951, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\Dean\AppData\Local\Programs\Python\Python39\lib\subprocess.py", line 1420, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
whats weird is that I've got all the files specified in the error in the right folder but it still can't find them for some reason.
I'm using pytesseract in Python for pdf. But I'm getting permission error in windows 10.
I have install tesseract-ocr-w64-setup-v5.0.0-alpha.20200328.exe from https://github.com/UB-Mannheim/tesseract/wiki
I have also poppler-20.09.0 files. and I`m using python 3.8.0
import pdf2image
import PyPDF2
import os
try:
from PIL import Image
except ImportError:
import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR'
def pdf_to_img(pdf_file):
print('pdf_file = ', pdf_file)
return pdf2image.convert_from_path(pdf_file, dpi=200, fmt='jpg',
poppler_path=r'F:\lokesh\resume_script\poppler-20.09.0\bin')
def ocr_core(file):
text = pytesseract.image_to_string(file,)
return text
def print_pages(pdf_file):
images = pdf_to_img(pdf_file)
for pg, img in enumerate(images):
print(ocr_core(img))
print_pages("aa.pdf")
when I run this code. it gives this error.
Traceback (most recent call last):
File "test.py", line 84, in <module>
print_pages("aa.pdf")
File "test.py", line 81, in print_pages
print(ocr_core(img))
File "test.py", line 74, in ocr_core
text = pytesseract.image_to_string(file,)
File "F:\python\lib\site-packages\pytesseract\pytesseract.py", line 344, in image_to_string
return {
File "F:\python\lib\site-packages\pytesseract\pytesseract.py", line 347, in <lambda>
Output.STRING: lambda: run_and_get_output(*args),
File "F:\python\lib\site-packages\pytesseract\pytesseract.py", line 258, in run_and_get_output
run_tesseract(**kwargs)
File "F:\python\lib\site-packages\pytesseract\pytesseract.py", line 229, in run_tesseract
raise e
File "F:\python\lib\site-packages\pytesseract\pytesseract.py", line 226, in run_tesseract
proc = subprocess.Popen(cmd_args, **subprocess_args())
File "F:\python\lib\subprocess.py", line 854, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "F:\python\lib\subprocess.py", line 1307, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
PermissionError: [WinError 5] Access is denied
how can we solve this error in windows
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR'
needs to be
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
I'm trying read texts from an image using pytesseract but it throws this error no matter what I try.
Code:
import os
import subprocess
import pytesseract
from PIL import Image
subprocess.call('dir', shell=True)
print(os.path.isfile("test.jpg")) #True
im = Image.open("test.jpg")
text = pytesseract.image_to_string(im)
OUTPUT:
Traceback (most recent call last):
File "F:/Programming/PYTHON/captcha/test/main.py", line 10, in <module>
text = pytesseract.image_to_string(im)
File "F:\Program Files (64bit)\Python\lib\site-packages\pytesseract\pytesseract.py", line 193, in image_to_string
return run_and_get_output(image, 'txt', lang, config, nice)
File "F:\Program Files (64bit)\Python\lib\site-packages\pytesseract\pytesseract.py", line 140, in run_and_get_output
run_tesseract(**kwargs)
File "F:\Program Files (64bit)\Python\lib\site-packages\pytesseract\pytesseract.py", line 111, in run_tesseract
proc = subprocess.Popen(command, stderr=subprocess.PIPE)
File "F:\Program Files (64bit)\Python\lib\subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "F:\Program Files (64bit)\Python\lib\subprocess.py", line 997, in _execute_child
startupinfo)
FileNotFoundError: [WinError 2] The system cannot find the file specified
Process finished with exit code 1
what's wrong? I know that file is there. I checked it using os.path.isfile and subprocess call
Other things that I tried:
-added full image file path using string literal 'r'
-tried different images and file types
I try to use osm-bundler:
ubuntu: ~/osm-bundler$ ./RunBundler.py —photos="/home/ubuntu/photo"
Working directory created: /home/ubuntu/osm-bundler/output/osm-bundler-o1drFG
BundlerMatching executable path: /home/ubuntu/osm-bundler/software/bundler/KeyMatchFull
Sift executable path: /home/ubuntu/osm-bundler/software/sift-lowe/sift
but i get an error:
Processing photo 'IMGP3417.jpg':
Copy of the photo has been scaled down to 1200x900
Traceback (most recent call last):
File "./RunBundler.py", line 10, in <module>
manager.preparePhotos()
File "/home/ubuntu/osm-bundler/osmbundler/__init__.py", line 168, in preparePhotos
self._preparePhoto(photoInfo)
File "/home/ubuntu/osm-bundler/osmbundler/__init__.py", line 278, in _preparePhoto
self.extractFeatures(photo)
File "/home/ubuntu/osm-bundler/osmbundler/__init__.py", line 345, in extractFeatures
self.featureExtractor.extract(photo, self.photoDict[photo])
File "/home/ubuntu/osm-bundler/osmbundler/features/siftlowe.py", line 25, in extract
p = subprocess.call(self.executable, **dict(stdin=photoFile, stdout=siftTextFile))
File "/usr/lib/python2.7/subprocess.py", line 523, in call
return Popen(*popenargs, **kwargs).wait()
File "/usr/lib/python2.7/subprocess.py", line 711, in __init__
errread, errwrite)
File "/usr/lib/python2.7/subprocess.py", line 1343, in _execute_child
raise child_exception
OSError: [Errno 2] No such file or directory
Lines 23,24, & 25 of siftlowe.py:
photoFile = open("%s.jpg.pgm" % photo, "rb")
siftTextFile = open("%s.key" % photo, "w")
subprocess.call(self.executable, **dict(stdin=photoFile, stdout=siftTextFile))
File permissions are set correctly.
It looks like you did only install osm-bundler.zip and not osm-budler-full.zip. The line with the self.executable tries to execute sift, which is expected at /home/ubuntu/osm-bundler/software/sift-lowe/sift. This is most probably not installed (I tried it myself with osm-bundler.zip and this was missing).
I'm trying to use the pytesser image_to_string function but it doesn't work:
The code is:
from PIL import ImageGrab
from pytesser import *
import Image
ImageGrab.grab((0, 0, 900, 500)).save("C:\Users\Emanuele\Desktop\sss" + '.jpg')
imm= Image.open("C:\Users\Emanuele\Desktop\sss.jpg")
print image_to_string(imm)
It says:
Traceback (most recent call last):
File "C:\Users\Emanuele\Desktop\Project Z\Python\Imagesave\Imagesave\__init__.py", line 7, in <module>
text = image_file_to_string('C:\Users\Emanuele\Desktop\sss.jpg', graceful_errors=False)
File "C:\Python27\lib\site-packages\pytesser.py", line 44, in image_file_to_string
call_tesseract(filename, scratch_text_name_root)
File "C:\Python27\lib\site-packages\pytesser.py", line 21, in call_tesseract
proc = subprocess.Popen(args)
File "C:\Python27\lib\subprocess.py", line 711, in __init__
errread, errwrite)
File "C:\Python27\lib\subprocess.py", line 948, in _execute_child
startupinfo)
WindowsError: [Error 2] Impossibile trovare il file specificato {traduction: Impossible to find specified file}