Why isn't pytesseract recognizing this image? - python

It can read other images fine, it just can not read this one
enter image description here
import numpy as np
from pytesseract import pytesseract
import cv2
import numpy as np
from PIL import Image
import os
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
pytesseract.tesseract_cmd = path_to_tesseract
img = cv2.imread(r'C:\Users\Owner\Desktop\Coding\PNGs\tugteam project\tugteam2.png')
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, img) = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
img = cv2.bitwise_not(img)
img = cv2.resize(img, (600, 400))
cv2.imshow('asd',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
text = pytesseract.image_to_string(img)
print(text)
I tried resizing the image, converting it to grayscale b/w w/b nothing will work.

Related

Color enhancement using PIL: How to compute color value?

I am using these functions to modify the values of brightness, contrast and sharpness. My default values are 128, 24, 4 respectively. I extracted these values. I don't know how to extract the value of the color to modify it in the same way.
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
def brightness_enhancer(img, br):
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
enhancer = ImageEnhance.Brightness(img)
factor = 128/br
return enhancer.enhance(factor)
def contrast_enhancer(img, ct):
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img = Image.fromarray(img)
enhancer = ImageEnhance.Contrast(img)
factor = 24/ct
return enhancer.enhance(factor)
def sharpness_enhancer(img, sh):
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img = Image.fromarray(img)
enhancer = ImageEnhance.Sharpness(img)
factor = 4/sh
return enhancer.enhance(factor)
def color_enhancer(img):
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img = Image.fromarray(img)
enhancer = ImageEnhance.Color(img)
factor = 0.5 #enhancement of the image
return enhancer.enhance(factor)

Transparent Captcha Image with Horizontal Line

def resolve(img_path):
image = Image.open(img_path)
new_image = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
new_image.paste(image, (0, 0), image) # Paste the image on the background.
new_image.convert('RGB').save(img_path, "JPEG") # Save as JPEG
enhancedImage = enhance(img_path)
return pytesseract.image_to_string(img_path)
def enhance(img_path):
image1 = cv2.imread(img_path)
#print(image1)
img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
#thresh = 50
#im_bw = cv2.threshold(thresh3, thresh, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 2))
erosion = cv2.erode(thresh1, kernel, iterations = 1)
return erosion
I'm trying to solve captcha for above images. Tried converting transparent bg to white and then enchancing the image but results are not correct.
Let me start with the potential problem with your code
def enhance(img_path):
image1 = cv2.imread(img_path)
Now, if you read it with imread the result will be:
You can't read it with pytesseract from the output image.
This is a known issue stated in this answer: cv2 imread transparency gone
As mentioned in the answer:
put a white image behind the transparent one an with that you solve the problem.
We will apply the same technique and now result will be:
As for the second image result will be:
We will be doing the following steps for efficiently reading from the output image:
Resize the image
Apply adaptive-threshold
For the first image the result will be:
For the second image the result will be:
Now when you read it with pytesseract with mode 6 (modes), result will be:
3daab
b42yb
Code:
import cv2
from PIL import Image
from pytesseract import image_to_string
def save_transparent_image(image_path, save_name):
image = Image.open(image_path).convert("RGBA")
canvas = Image.new(mode='RGBA',
size=image.size, color=(255, 255, 255, 255))
canvas.paste(image, mask=image)
canvas.save(save_name, format="PNG")
img_lst = ["o3upS.png", "kGpYk.png"]
for i, img_name in enumerate(img_lst):
save_image = "captcha" + str(i) + ".png"
save_transparent_image(img_name, save_image)
# Step 1: Resize the image
img = cv2.imread(save_image)
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*2, h*2))
# Step 2: Apply adaptive-threshold
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 33, 79)
# Step 3: Read the threshold image
txt = image_to_string(thr, config="--psm 6")
txt = txt.split("\n")[0]
print(txt)
Question Will this code will works for the other captchas?
No, it won't. Unless the captchas are similar to the given example. You need to change the adaptive-threshold's block-size and C parameters, see if it works.

Pytesseract OCR not returning proper result on captcha image

i am using tesseract 4.0.0-beta.1
I have the following image
ocr image
i have converted this image using opencv
converted image
img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, None, fx=5, fy=5, interpolation=cv2.INTER_LINEAR)
img = cv2.medianBlur(img, 9)
_, img = cv2.threshold(img, 185, 255, cv2.THRESH_BINARY)
my tesseract commend
tesseract image.png stdout -l eng-best --oem 1 --psm 7
getting result: NVRG nk
but result should be : nvRGnk
Starting from your converted image, just need a little more filtering
nvRGnk
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('2.png',0)
image = 255 - image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=2)
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2,2))
dilate = cv2.dilate(close, dilate_kernel, iterations=1)
result = 255 - dilate
data = pytesseract.image_to_string(result, lang='eng',config='--psm 13')
print(data)
cv2.imshow('result', result)
cv2.waitKey()

Pytesseract doesn't accept pyautogui screenshot, Windows, Python 3.6

What I'm trying to do is to make a screenshot of a number with pyautogui and tranform the number to a string with pytesseract. The code:
import pyautogui
import time
import PIL
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C://Program Files (x86)//Tesseract-OCR//tesseract'
# Create image
time.sleep(5)
image = pyautogui.screenshot('projects/output.png', region=(1608, 314, 57, 41))
# Resize image
basewidth = 2000
img = Image.open('projects/output.png')
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), PIL.Image.ANTIALIAS)
img.save('projects/output.png')
col = Image.open('projects/output.png')
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<128 else 255, '1')
bw.save('projects/output.png')
# Image to string
screen = Image.open('projects/output.png')
print(pytesseract.image_to_string(screen, config='tessedit_char_whitelist=0123456789'))
Now it seems that pytesseract doesn't accept the screenshot pyautogui creates. The code runs fine without problems but prints an empty string. If I create an image in paint however, and save it as 'output.png' to the correct folder exactly like the screenshot otherwise made, it does work.
Image output after resize and adjustments
Anyone has an idea where I'm missing something?
Modify the path and try the following:
import numpy as np
from numpy import *
from PIL import Image
from PIL import *
import pytesseract
import cv2
src_path = "C:\\Users\\USERNAME\\Documents\\OCR\\"
def get_region(box):
#Grabs the region of the box coordinates
im = ImageGrab.grab(box)
#Change size of image to 200% of the original size
a, b, c, d = box
doubleX = (c - a) * 2
doubleY = (d - b) * 2
im.resize((doubleX, doubleY)).save(os.getcwd() + "\\test.png", 'PNG')
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
return result
def main():
#Grab the region of the screenshot (box area)
region = (1354,630,1433,648)
get_region(region)
#Output results
print ("OCR Output: ")
print (get_string(src_path + "test.png"))
Convert it to a numpy array, pytesseract accepts those.
import numpy as np
import pyautogui
img = np.array(pyautogui.screenshot())
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))
Alternatively I would recommend 'mss' for screenshots as they are much faster.
import mss
with mss.mss() as sct:
img = np.array(sct.grab(sct.monitors[1]))
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))

Adjusting RGB on PIL ImageGrab Module Python

I am trying to make a screencast script with ImageGrab but I am getting wired blue color images
import cv2
import numpy as np
from PIL import ImageGrab
out = cv2.VideoWriter('record.avi', 2, 8.0, (1366, 768))
while (True):
img = ImageGrab.grab(bbox=(0, 0, 1366, 768)) # x, y, w, h
img_np = np.array(img)
frame = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
out.write(img_np)
cv2.imshow("Recorder", frame)
key = cv2.waitKey(1)
if key == 27:
break
cv2.waitKey(0)
cv2.destroyAllWindows()
RGB_img = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
vid.write(RGB_img)
This will work.

Categories

Resources