Pytesseract doesn't accept pyautogui screenshot, Windows, Python 3.6 - python

What I'm trying to do is to make a screenshot of a number with pyautogui and tranform the number to a string with pytesseract. The code:
import pyautogui
import time
import PIL
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C://Program Files (x86)//Tesseract-OCR//tesseract'
# Create image
time.sleep(5)
image = pyautogui.screenshot('projects/output.png', region=(1608, 314, 57, 41))
# Resize image
basewidth = 2000
img = Image.open('projects/output.png')
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), PIL.Image.ANTIALIAS)
img.save('projects/output.png')
col = Image.open('projects/output.png')
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<128 else 255, '1')
bw.save('projects/output.png')
# Image to string
screen = Image.open('projects/output.png')
print(pytesseract.image_to_string(screen, config='tessedit_char_whitelist=0123456789'))
Now it seems that pytesseract doesn't accept the screenshot pyautogui creates. The code runs fine without problems but prints an empty string. If I create an image in paint however, and save it as 'output.png' to the correct folder exactly like the screenshot otherwise made, it does work.
Image output after resize and adjustments
Anyone has an idea where I'm missing something?

Modify the path and try the following:
import numpy as np
from numpy import *
from PIL import Image
from PIL import *
import pytesseract
import cv2
src_path = "C:\\Users\\USERNAME\\Documents\\OCR\\"
def get_region(box):
#Grabs the region of the box coordinates
im = ImageGrab.grab(box)
#Change size of image to 200% of the original size
a, b, c, d = box
doubleX = (c - a) * 2
doubleY = (d - b) * 2
im.resize((doubleX, doubleY)).save(os.getcwd() + "\\test.png", 'PNG')
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
return result
def main():
#Grab the region of the screenshot (box area)
region = (1354,630,1433,648)
get_region(region)
#Output results
print ("OCR Output: ")
print (get_string(src_path + "test.png"))

Convert it to a numpy array, pytesseract accepts those.
import numpy as np
import pyautogui
img = np.array(pyautogui.screenshot())
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))
Alternatively I would recommend 'mss' for screenshots as they are much faster.
import mss
with mss.mss() as sct:
img = np.array(sct.grab(sct.monitors[1]))
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))

Related

Why isn't pytesseract recognizing this image?

It can read other images fine, it just can not read this one
enter image description here
import numpy as np
from pytesseract import pytesseract
import cv2
import numpy as np
from PIL import Image
import os
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
pytesseract.tesseract_cmd = path_to_tesseract
img = cv2.imread(r'C:\Users\Owner\Desktop\Coding\PNGs\tugteam project\tugteam2.png')
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, img) = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
img = cv2.bitwise_not(img)
img = cv2.resize(img, (600, 400))
cv2.imshow('asd',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
text = pytesseract.image_to_string(img)
print(text)
I tried resizing the image, converting it to grayscale b/w w/b nothing will work.

is there a way to able to read text from such images using selenium python

https://imgur.com/a/zCmwUEf.jpg
this is the image from whom i am trying to extract text but unable to do so.
import contours
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\tan\tesseract\Tesseract-OCR\tesseract.exe'
# Opening the image & storing it in an image object
img = cv2.imread("C:/Users/tan/Desktop/my tppc bots/training challange - Copy/sample4.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
file = open("recognized.txt", "w+")
file.write("")
file.close()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
file.write(text)
file.write("\n")
this is my script
when i run it, it execute fine but when i open the text file it doesnt show any texts there just empty.
am i doing something wrong?
if someone can help me that be great
thanks!
I have found easyocr lib promising here.
Import the libs
import numpy as np
import easyocr
import cv2
read the image file
reader = easyocr.Reader(['en'],gpu = False) # load once only in memory.
image_file_name='capImage.png' # this is the screen snap of your image
image = cv2.imread(image_file_name)
get the text from image
image_text=(reader.readtext(image,detail=0)[0]) # output came as D F7BE1
print(image_text.replace(" ","")) # removed the space and output is : DF7BE1
clean up options for image :
image = cv2.imread(image_file_name)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
height = 100
dim = (800, 800)
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
now utilize the images.
image_text=(reader.readtext(thresh,detail=0)[0])
print(image_text.replace(" ",""))
image_text=(reader.readtext(sharpen,detail=0)[0])
print(image_text.replace(" ",""))
output:

Transparent Captcha Image with Horizontal Line

def resolve(img_path):
image = Image.open(img_path)
new_image = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
new_image.paste(image, (0, 0), image) # Paste the image on the background.
new_image.convert('RGB').save(img_path, "JPEG") # Save as JPEG
enhancedImage = enhance(img_path)
return pytesseract.image_to_string(img_path)
def enhance(img_path):
image1 = cv2.imread(img_path)
#print(image1)
img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
#thresh = 50
#im_bw = cv2.threshold(thresh3, thresh, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 2))
erosion = cv2.erode(thresh1, kernel, iterations = 1)
return erosion
I'm trying to solve captcha for above images. Tried converting transparent bg to white and then enchancing the image but results are not correct.
Let me start with the potential problem with your code
def enhance(img_path):
image1 = cv2.imread(img_path)
Now, if you read it with imread the result will be:
You can't read it with pytesseract from the output image.
This is a known issue stated in this answer: cv2 imread transparency gone
As mentioned in the answer:
put a white image behind the transparent one an with that you solve the problem.
We will apply the same technique and now result will be:
As for the second image result will be:
We will be doing the following steps for efficiently reading from the output image:
Resize the image
Apply adaptive-threshold
For the first image the result will be:
For the second image the result will be:
Now when you read it with pytesseract with mode 6 (modes), result will be:
3daab
b42yb
Code:
import cv2
from PIL import Image
from pytesseract import image_to_string
def save_transparent_image(image_path, save_name):
image = Image.open(image_path).convert("RGBA")
canvas = Image.new(mode='RGBA',
size=image.size, color=(255, 255, 255, 255))
canvas.paste(image, mask=image)
canvas.save(save_name, format="PNG")
img_lst = ["o3upS.png", "kGpYk.png"]
for i, img_name in enumerate(img_lst):
save_image = "captcha" + str(i) + ".png"
save_transparent_image(img_name, save_image)
# Step 1: Resize the image
img = cv2.imread(save_image)
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*2, h*2))
# Step 2: Apply adaptive-threshold
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 33, 79)
# Step 3: Read the threshold image
txt = image_to_string(thr, config="--psm 6")
txt = txt.split("\n")[0]
print(txt)
Question Will this code will works for the other captchas?
No, it won't. Unless the captchas are similar to the given example. You need to change the adaptive-threshold's block-size and C parameters, see if it works.

unable to select image corners in opencv

I am trying to select the image corner's and crop it and then perform perspective transform on it.
but when I run this code, the window with an image opens up, but on double click, nothing happens so that I with double click, I could select image corners.
here's my code
from transform import four_point_transform
import imutils
from skimage.filters import threshold_adaptive
import numpy as np
import cv2
image = cv2.imread("test1.jpg")
ratio = image.shape[0] / 500.0
orig = image.copy()
window_name = "Select corner points of object"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.imshow(window_name, image)
pts_1 = []
def callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDBLCLK:
pts_1.append((x, y))
cv2.circle(image,(x,y),10,(0,255,0),5)
cv2.imshow(window_name, image)
if(len(pts_1)==4):
pts = np.array(pts_1,dtype="float32")
print(pts_1)
warped = four_point_transform(orig, pts)
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped = threshold_adaptive(warped, 251, offset = 10)
warped = warped.astype("uint8") * 255
cv2.imshow("Original", imutils.resize(orig, height = 650))
cv2.imshow("Scanned", imutils.resize(warped, height = 650))
cv2.waitKey(0)
cv2.setMouseCallback(window_name, callback)
key = cv2.waitKey(0)
Any help would be appreciated?

Python - resize image

I'm using the code below (which is a googling result) to detect faces:
import io
import picamera
import cv2
import numpy
import PIL
from PIL import Image
from resizeimage import resizeimage
#Load a cascade file for detecting faces
face_cascade = cv2.CascadeClassifier('/usr/share/opencv/haarcascades/haarcascade_frontalface_alt.xml')
#Create a memory stream so photos doesn't need to be saved in a file
stream = io.BytesIO()
#Get the picture (low resolution, so it should be quite fast)
#Here you can also specify other parameters (e.g.:rotate the image)
with picamera.PiCamera() as camera:
camera.resolution = (640, 480)
camera.vflip = False
camera.hflip = False
camera.brightness = 60
camera.capture(stream, format='jpeg')
#Convert the picture into a numpy array
buff = numpy.fromstring(stream.getvalue(), dtype=numpy.uint8)
#Now creates an OpenCV image
image = cv2.imdecode(buff, 1)
#Load a cascade file for detecting faces
#face_cascade = cv2.CascadeClassifier('/usr/share/opencv/haarcascades/haarcascade_frontalface_alt.xml')
#Convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#Look for faces in the image using the loaded cascade file
faces = face_cascade.detectMultiScale(gray, 1.1, 5)
print "Found "+str(len(faces))+" face(s)"
#Draw a rectangle around every found face
#Crop faces and save to separate files
id = 1
for (x,y,w,h) in faces:
cv2.rectangle(image,(x,y),(x+w,y+h),(255,0,0),2)
cropped = image[ y : y+h, x : x+w ]
#RESIZE IMAGE to 92x112
cropped = cv2.resize(cropped,None,92,112)
cv2.imwrite("../reco/test_faces/cropped_face" + str(id) + ".png", cropped)
id = id + 1
At the end I want to crop the faces into image files and resize them to 92x112. This is what I try with:
cropped = cv2.resize(cropped,None,92,112)
When I run this I get:
OpenCV Error: Assertion failed (dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0)) in resize, file /build/opencv-ISmtkH/opencv-2.4.9.1+dfsg/modules/imgproc/src/imgwarp.cpp, line 1835
Traceback (most recent call last):
File "1track.py", line 48, in <module>
cropped = cv2.resize(cropped,None,92,112)
cv2.error: /build/opencv-ISmtkH/opencv-2.4.9.1+dfsg/modules/imgproc/src/imgwarp.cpp:1835: error: (-215) dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0) in function resize
To resize the image to new dimensions, you need to know the ratio between the new dimensions and the current ones. So if you want to set (for example) a 640x480 image into a 92x112 image:
92/640=0.143
112/480=0.233
You use these ratios in the cv2.resize function:
cropped = cv2.resize(cropped, (0,0), fx=0.143, fy=0.233)

Categories

Resources