Hey I started working with Tesseract OCR but I'm having problems getting the text from really simple RGB images.
It works just fine with text2image images.
Here is my code:
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import sys
class wordExtractor():
def __init__(self, image_path):
self.image_path = image_path
pytesseract.pytesseract.tesseract_cmd = r'/home/yarin/tesseract/bin/debug/tesseract'
#self.resize_image()
def resize_image(self):
basewidth = 800
img = Image.open(self.image_path)
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), Image.ANTIALIAS)
os.remove(self.image_path)
img.save(self.image_path[:-4] + '.png')
self.image_path = self.image_path[:-4] + '.png'
def get_text(self, lang):
# load the example image and convert it to grayscale
image = cv2.imread(self.image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to preprocess the
# image
#if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
#elif args["preprocess"] == "blur":
# gray = cv2.medianBlur(gray, 3)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
#load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename), lang='eng')
os.remove(filename)
return text
# show the output images
#cv2.imshow("Image", image)
#cv2.imshow("Output", gray)
#cv2.waitKey(0)
w = wordExtractor('6.png')
print(w.get_text('eng'))
Tesseract returns empty string for the following images:
Please show me how can I solve this Thanks in advance!
After thresholding, you can use findContours to find contour for each shape. Then you can filter the contours and put every contour you are interested in into a blank white image. By then, you will get the letters and ready to process using tesseract. You can see the detail in the code below.
import cv2
import numpy as np
import pytesseract
# img = cv2.imread("dwLFQ.png", cv2.IMREAD_COLOR)
img = cv2.imread("NfwY4.png", cv2.IMREAD_COLOR)
# img = cv2.imread("xTH6s.png", cv2.IMREAD_COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
items = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
base = np.zeros(thresh.shape, dtype=np.uint8)
base = cv2.bitwise_not(base)
max_area = 0
for i in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[i])
ratio = h / w
area = cv2.contourArea(contours[i])
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
if 1 < ratio < 3:
max_area = max(area, max_area)
print("area: " + str(area) + ", max area: " + str(max_area) + ", ratio: " + str(ratio))
# if 1000 < area < max_area / 2:
if 1000 < area < 40000:
mask = np.zeros(thresh.shape, dtype=np.uint8)
cv2.drawContours(mask, [contours[i]], -1, color=255, thickness=-1)
mean = cv2.mean(thresh, mask=mask)
segment = np.zeros((h, w), dtype=np.uint8)
segment[:h, :w] = thresh[y:y + h, x:x + w]
if mean[0] > 150:
# white, invert
segment = cv2.bitwise_not(segment)
base[y:y + h, x:x + w] = segment[:h, :w]
cv2.imshow("base", base)
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
cv2.waitKey(0)
custom_config = r'-l eng --oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ " '
text = pytesseract.image_to_string(base, config=custom_config)
print("detected: " + text)
cv2.imshow("img", img)
cv2.imshow("base", base)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result
detected: NO
ENTRY
Related
I have the following code to extract text from an image
img = cv2.imread('download.jpg')
text = pytesseract.image_to_string(img, lang='lets', config='--psm 6 ')
solution = re.sub('[^0-9]','', text)
However using an image like below where it says 1981, the actual text that gets pulled back is 5139011
Any suggestions?
Most important part is to clean pepper noise. After some opencv operations (maybe not the best ones) i achieve this clean image:
To get expected text extraction i use this tesseract trained font:
Trained data
Here's the result:
And here's the code:
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('a.jpg')
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(_, blackWhiteImage) = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
blackWhiteImage = cv2.copyMakeBorder(src=blackWhiteImage, top=50, bottom=50, left=50, right=50, borderType=cv2.BORDER_CONSTANT, value=(255,255,255))
blackWhiteImage = cv2.dilate(blackWhiteImage, cv2.getStructuringElement(cv2.MORPH_RECT, (1,4)))
blackWhiteImage = cv2.dilate(blackWhiteImage, cv2.getStructuringElement(cv2.MORPH_RECT, (4,1)))
blackWhiteImage = cv2.morphologyEx(blackWhiteImage, cv2.MORPH_CLOSE, np.ones((3,3),np.uint8))
blackWhiteImage = cv2.erode(blackWhiteImage, np.ones((7,7),np.uint8))
data = pytesseract.image_to_data(blackWhiteImage, lang="lets", config="-c tessedit_char_whitelist=0123456789 --psm 7")
originalImage = cv2.cvtColor(blackWhiteImage, cv2.COLOR_GRAY2BGR)
text = []
for z, a in enumerate(data.splitlines()):
if z != 0:
a = a.split()
if len(a) == 12:
x, y = int(a[6]), int(a[7])
w, h = int(a[8]), int(a[9])
cv2.rectangle(originalImage, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.putText(originalImage, a[11], (x, y - 2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1)
text.append(a[11]);
print("Text result: \n", text)
cv2.imshow('Image result', originalImage)
cv2.waitKey(0)
https://imgur.com/a/zCmwUEf.jpg
this is the image from whom i am trying to extract text but unable to do so.
import contours
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\tan\tesseract\Tesseract-OCR\tesseract.exe'
# Opening the image & storing it in an image object
img = cv2.imread("C:/Users/tan/Desktop/my tppc bots/training challange - Copy/sample4.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
file = open("recognized.txt", "w+")
file.write("")
file.close()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
file.write(text)
file.write("\n")
this is my script
when i run it, it execute fine but when i open the text file it doesnt show any texts there just empty.
am i doing something wrong?
if someone can help me that be great
thanks!
I have found easyocr lib promising here.
Import the libs
import numpy as np
import easyocr
import cv2
read the image file
reader = easyocr.Reader(['en'],gpu = False) # load once only in memory.
image_file_name='capImage.png' # this is the screen snap of your image
image = cv2.imread(image_file_name)
get the text from image
image_text=(reader.readtext(image,detail=0)[0]) # output came as D F7BE1
print(image_text.replace(" ","")) # removed the space and output is : DF7BE1
clean up options for image :
image = cv2.imread(image_file_name)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
height = 100
dim = (800, 800)
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
now utilize the images.
image_text=(reader.readtext(thresh,detail=0)[0])
print(image_text.replace(" ",""))
image_text=(reader.readtext(sharpen,detail=0)[0])
print(image_text.replace(" ",""))
output:
I used below code for find cigarettes count in the below image using opencv python, but its not worked. Only this code finding some places only. i don't know what is the issue.. please help me
import numpy as np
import cv2
from PIL import Image
import sys
Path='D:\Artificial intelligence\Phyton'
filename='Test.png'
img = cv2.imread('D:\Artificial intelligence\Phyton\Test.png')
img1 = cv2.imread('D:\Artificial intelligence\Phyton\Test.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
img[thresh == 255] = 0
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
erosion = cv2.erode(img, kernel, iterations = 1)
cv2.imwrite('D:\Artificial intelligence\Phyton\Test112.png',erosion)
def findcircles(img,contours):
minArea = 300;
minCircleRatio = 0.5;
for contour in contours:
(x,y),radius = cv2.minEnclosingCircle(contour)
center = (int(x),int(y))
radius = int(radius)
if radius > 5:
continue;
cv2.circle(img1, center, 1, (191, 255, 0), 2)
cv2.imwrite('D:\Artificial intelligence\Phyton\Test11234.png',img1)
img = cv2.imread("D:\Artificial intelligence\Phyton\Test112.png")
cv2.imwrite('D:\Artificial intelligence\Phyton\org.png',img)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,threshold = cv2.threshold(gray, 199, 255,cv2.THRESH_BINARY_INV)
cv2.imwrite('D:\Artificial intelligence\Phyton\threshold.png',threshold)
blur = cv2.medianBlur(gray,7)
cv2.imwrite('D:\Artificial intelligence\Phyton\blur.png',blur)
laplacian=cv2.Laplacian(blur,-1,ksize = 5,delta = -50)
cv2.imwrite('D:\Artificial intelligence\Phyton\laplacian.png',laplacian)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7))
dilation = cv2.dilate(laplacian,kernel,iterations = 1)
cv2.imwrite('D:\Artificial intelligence\Phyton\dilation.png',dilation)
result= cv2.subtract(threshold,dilation)
cv2.imwrite('D:\Artificial intelligence\Phyton\result.png',result)
contours, hierarchy = cv2.findContours(result,cv2.RETR_LIST,cv2.CHAIN_APPROX_NONE)
findcircles(gray,contours)
Image :
enter image description here
My result:
enter image description here
def resolve(img_path):
image = Image.open(img_path)
new_image = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
new_image.paste(image, (0, 0), image) # Paste the image on the background.
new_image.convert('RGB').save(img_path, "JPEG") # Save as JPEG
enhancedImage = enhance(img_path)
return pytesseract.image_to_string(img_path)
def enhance(img_path):
image1 = cv2.imread(img_path)
#print(image1)
img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
#thresh = 50
#im_bw = cv2.threshold(thresh3, thresh, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 2))
erosion = cv2.erode(thresh1, kernel, iterations = 1)
return erosion
I'm trying to solve captcha for above images. Tried converting transparent bg to white and then enchancing the image but results are not correct.
Let me start with the potential problem with your code
def enhance(img_path):
image1 = cv2.imread(img_path)
Now, if you read it with imread the result will be:
You can't read it with pytesseract from the output image.
This is a known issue stated in this answer: cv2 imread transparency gone
As mentioned in the answer:
put a white image behind the transparent one an with that you solve the problem.
We will apply the same technique and now result will be:
As for the second image result will be:
We will be doing the following steps for efficiently reading from the output image:
Resize the image
Apply adaptive-threshold
For the first image the result will be:
For the second image the result will be:
Now when you read it with pytesseract with mode 6 (modes), result will be:
3daab
b42yb
Code:
import cv2
from PIL import Image
from pytesseract import image_to_string
def save_transparent_image(image_path, save_name):
image = Image.open(image_path).convert("RGBA")
canvas = Image.new(mode='RGBA',
size=image.size, color=(255, 255, 255, 255))
canvas.paste(image, mask=image)
canvas.save(save_name, format="PNG")
img_lst = ["o3upS.png", "kGpYk.png"]
for i, img_name in enumerate(img_lst):
save_image = "captcha" + str(i) + ".png"
save_transparent_image(img_name, save_image)
# Step 1: Resize the image
img = cv2.imread(save_image)
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*2, h*2))
# Step 2: Apply adaptive-threshold
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 33, 79)
# Step 3: Read the threshold image
txt = image_to_string(thr, config="--psm 6")
txt = txt.split("\n")[0]
print(txt)
Question Will this code will works for the other captchas?
No, it won't. Unless the captchas are similar to the given example. You need to change the adaptive-threshold's block-size and C parameters, see if it works.
I am a beginner to opencv and I have tried to crop a single face from a picture for my project but couldn't crop all the faces from the picture.
What can be done to detect all the faces and crop them to move to a folder?
taking images from the input folder and posting the cropped image to the output folder.
import numpy as np
import cv2
import os, os.path
#multiple cascades: https://github.com/Itseez/opencv/tree/master/data/haarcascades
#https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml
face_cascade = cv2.CascadeClassifier('faces.xml')
#https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_eye.xml
eye_cascade = cv2.CascadeClassifier('eye.xml')
DIR = 'input'
numPics = len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))])
for pic in range(1, (numPics+1)):
img = cv2.imread('input/'+str(pic)+'.jpg')
height = img.shape[0]
width = img.shape[1]
size = height * width
if size > (500^2):
r = 500.0 / img.shape[1]
dim = (500, int(img.shape[0] * r))
img2 = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
img = img2
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
eyesn = 0
for (x,y,w,h) in faces:
imgCrop = img[y:y+h,x:x+w]
#cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex,ey,ew,eh) in eyes:
#cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
eyesn = eyesn +1
if eyesn >= 2:
cv2.imwrite("output/crop"+str(pic)+".jpg", imgCrop)
#cv2.imshow('img',imgCrop)
print("Image"+str(pic)+" has been processed and cropped")
k = cv2.waitKey(30) & 0xff
if k == 27:
break
#cap.release()
print("All images have been processed!!!")
cv2.destroyAllWindows()
cv2.destroyAllWindows()
How to crop all faces from a picture to save in a folder?
#### the counter
cnt = 0
for pic in range(1, (numPics+1)):
img = cv2.imread('input/'+str(pic)+'.jpg')
height = img.shape[0]
width = img.shape[1]
size = height * width
if size > (500^2):
r = 500.0 / img.shape[1]
dim = (500, int(img.shape[0] * r))
img2 = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
img = img2
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
eyesn = 0
imgCrop = img[y:y+h,x:x+w]
#cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex,ey,ew,eh) in eyes:
#cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
eyesn = eyesn +1
if eyesn >= 2:
#### increase the counter and save
cnt +=1
cv2.imwrite("output/crop{}_{}.jpg".format(pic, cnt), imgCrop)
#cv2.imshow('img',imgCrop)
print("Image"+str(pic)+" has been processed and cropped")
k = cv2.waitKey(100) & 0xff
if k == 27:
break
#cap.release()
print("All images have been processed!!!")
cv2.destroyAllWindows()
cv2.destroyAllWindows()
You said you are tried for single face. Have you succeed in that? What is the error you are getting after executing the given code?