Crop all characters from an image - python

I put together some code to extract all characters from an image. I sort the characters from left to right and I try to crop each character into a separate image. Not all characters are properly cropped, some of them end up having size zero.
The only characters that do not have one dimension zero are BCDEF. Here is an image of the output.
import cv2
import numpy as np
def crop_minAreaRect(img, rect):
# https://stackoverflow.com/questions/37177811/crop-rectangle-returned-by-minarearect-opencv-python
# rotate img
center = rect[0]
size = rect[1]
print("size[0]: " + str(int(size[0])) + ", size[1]: " + str(int(size[1])))
angle = rect[2]
print("angle: " + str(angle))
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], angle)
box = cv2.boxPoints(rect0)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1], pts[1][0]:pts[2][0]]
w, h = img_crop.shape[0], img_crop.shape[1]
print("w_cropped: " + str(w) + ", h_cropped: " + str(h))
return img_crop
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b:b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cv2.drawContours(im_copy, contours, -1, (0,255,0), 2)
#cv2.imshow("contours", im_copy)
print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
# print(str(rect))
# if rect[1][0] > 0 and rect[1][1]>0:
im_cropped = crop_minAreaRect(im, rect)
h,w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
print("w: " + str(w) + ", h: " + str(h))
if w>0 and h>0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)

There appears to be an error in your crop_minAreaRect function.
I haven't debugged your code any further than the return of crop_minAreaRect, so the letters may or may not be correctly rotated according following your approach, but this change fixes the underlying problem.
The proposed function is taken from the following question and modified: How to straighten a rotated rectangle area of an image using OpenCV in Python?
import cv2
import numpy as np
def subimage(image, center, theta, width, height):
'''
Rotates OpenCV image around center with angle theta (in deg)
then crops the image according to width and height.
'''
width = int(width)
height = int(height)
# Uncomment for theta in radians
# theta *= 180/np.pi
shape = (image.shape[1], image.shape[0]) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D(center=center, angle=theta, scale=1)
image = cv2.warpAffine(src=image, M=matrix, dsize=shape)
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[y:y + height, x:x + width]
return image
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(im_copy, contours, -1, (0, 255, 0), 2)
cv2.imshow("contours", im_copy)
# print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
im_cropped = subimage(im, center=rect[0], theta=rect[2], width=rect[1][0], height=rect[1][1])
h, w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
# print("w: " + str(w) + ", h: " + str(h))
if w > 0 and h > 0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)

Related

Binarize low contrast images

I have a bunch of image-snippets with low contrast which I'd like to binarize using python.
I tried varies thresholding methods like Otsu and Huang, but none seems to work for all my image snippets.
Following instructions like this one, I puzzled together the code below:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import math
import glob
import os.path
import os
def permissions(targetfile):
os.chmod(targetfile, mode=0o755)
os.chown(targetfile, 1000, 1000)
#resize snippet
def resize( image):
image_resized = cv2.resize(image, None, fx=12, fy=12)
return image_resized
#Apply clahe
def clahe( image):
# Clahe parameters
cl1 = 6
cl2 = 9
cl3 = 9
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=cl1, tileGridSize=(cl2, cl3))
cv_gray_clahe = clahe.apply(image_gray)
return cv_gray_clahe
# Binarize image using Huangs method (https://github.com/dnhkng/Huang-Thresholding)
def binarize( image):
# image = np.array(image) # image needs to be of class 'numpy.ndarray'
histogram, bin_edges = np.histogram(image, bins=range(257))
huang_threshold = Huang(histogram)
huang_threshold = huang_threshold
threshold = np.where(image > huang_threshold, 1, 0)
threshold = threshold.astype(np.uint8)
return threshold
def Huang(data):
"""Implements Huang's fuzzy thresholding method
Uses Shannon's entropy function (one can also use Yager's entropy function)
Huang L.-K. and Wang M.-J.J. (1995) "Image Thresholding by Minimizing
the Measures of Fuzziness" Pattern Recognition, 28(1): 41-51"""
threshold=-1
first_bin= 0
for ih in range(254):
if data[ih] != 0:
first_bin = ih
break
last_bin=254;
for ih in range(254,-1,-1):
if data[ih] != 0:
last_bin = ih
break
term = 1.0 / (last_bin - first_bin)
# print (first_bin, last_bin, term)
mu_0 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(first_bin,254):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_0[ih] = sum_pix / num_pix # NUM_PIX cannot be zero !
mu_1 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(last_bin, 1, -1 ):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_1[ih-1] = sum_pix / num_pix # NUM_PIX cannot be zero !
min_ent = float("inf")
for it in range(254):
ent = 0.0
for ih in range(it):
# Equation (4) in Reference
mu_x = 1.0 / ( 1.0 + term * math.fabs( ih - mu_0[it]))
if ( not ((mu_x < 1e-06 ) or (mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
for ih in range(it + 1, 254):
# Equation (4) in Ref. 1 */
mu_x = 1.0 / (1.0 + term * math.fabs( ih - mu_1[it]))
if ( not((mu_x < 1e-06 ) or ( mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
if (ent < min_ent):
min_ent = ent
threshold = it
# print ("min_ent, threshold ", min_ent, threshold)
return threshold
#Inputfiles:
path = glob.glob("./" + "*.JPG")
path.extend(glob.glob("./" + "*.jpg"))
#Output directory
targetdir = "./output/"
os.makedirs( targetdir, exist_ok=True)
permissions(targetdir)
for img in path:
poststring = ""
targetfile = targetdir + os.path.basename(img).split('.')[0] + poststring + \
os.path.splitext(img)[1]
# Change filename of targetfile
if not os.path.exists(targetfile):
print("Processing targetfile: ", targetfile)
# read image and resize
image = cv2.imread(img)
resized_image = resize(image)
#clahe
clahe_image = clahe(resized_image)
denoised_image = cv2.fastNlMeansDenoising(clahe_image, h = 21, templateWindowSize = 9, searchWindowSize = 21)
#huang thresholding
binarized_image = binarize( denoised_image)
binarized_image *= 255
# dilate
kernel = np.ones((12,12),np.uint8)
dilate = cv2.dilate(binarized_image,kernel,iterations=3)
# Flood fill
h, w = image.shape[:2]
for row in range(h):
if dilate[row, 0] == 255:
cv2.floodFill(dilate, None, (0, row), 0)
if dilate[row, w-1] == 255:
cv2.floodFill(dilate, None, (w-1, row), 0)
for col in range(w):
if dilate[0, col] == 255:
cv2.floodFill(dilate, None, (col, 0), 0)
if dilate[h-1, col] == 255:
cv2.floodFill(dilate, None, (col, h-1), 0)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
foreground = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel)
foreground = cv2.morphologyEx(foreground, cv2.MORPH_CLOSE, kernel)
# Creating background
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17))
background = cv2.dilate(foreground, kernel, iterations=3)
cv2.imwrite(targetfile, background)
permissions(targetfile)
else:
print("Skipping, because already existing: ", targetfile)
permissions(targetfile)
print('')
The result is still not satisfying:
Could you please advise on how to loose the noise, maintain the desired features and receive straight/ellipse-like contourlines?
Adding the original snippets here for testing purposes: download snippets
Here is one approach by stretching the contrast first. It may need tuning for other images.
Read the input
Stretch the contrast
Convert to gray
Adaptive threshold
Get contours and filter on area larger than some threshold
Draw white filled contours on black background for those contours that pass the filter
Save the results
Input:
import cv2
import numpy as np
import skimage.exposure
# load image
img = cv2.imread('low_contrast.png')
# stretch contrast
stretch = skimage.exposure.rescale_intensity(img, in_range=(95,115), out_range=(0,255)).astype(np.uint8)
# Gaussian blur
blur = cv2.GaussianBlur(stretch, (0,0), sigmaX=5, sigmaY=5)
# convert to gray
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, -9)
# get contours and filter on area
contour_img = img.copy()
result = np.zeros_like(thresh)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 2000:
cv2.drawContours(contour_img, [cntr], 0, (255,255,255), 1)
cv2.drawContours(result, [cntr], 0, (255), -1)
cv2.imwrite('low_contrast_stretched.png', stretch)
cv2.imwrite('low_contrast_blur.png', blur)
cv2.imwrite('low_contrast_gray.png', gray)
cv2.imwrite('low_contrast_thresh.png', thresh)
cv2.imwrite('low_contrast_contours.png', contour_img)
cv2.imwrite('low_contrast_contours_filled.png', result)
cv2.imshow('stretch', stretch)
cv2.imshow('blur', blur)
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('contours', contour_img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contrast stretched image:
Blurred image:
Grayscale image:
Adaptive threshold image:
Filtered Contour Image:
Final Binary image:

opencv show rendered image with bounding boxes and labels

The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword#172.16.66.106/live -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
take_pic(output_filename)
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
else:
print("camera open failed")
cv2.destroyAllWindows()
With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
break
If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
Try:
cv2.imshow("demo", img * 255)
cv2.waitKey(0)

OCR for digit recognition in Python using Open CV and Pytesseract

Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.
Following is my code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median
binary = cv2.bitwise_not(image_gray)
blur = cv2.GaussianBlur(image_gray,(5,5),0)
ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(th2, 50, 80, 255)
#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)
contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
rect_cnts = []
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
pass
else :
rect_cnts.append(cnt)
max_area = 0
football_square = None
for cnt in rect_cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
if max_area < w*h:
max_area = w*h
football_square = cnt
image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)
(x, y, w, h) = cv2.boundingRect(football_square)
new_image = image[y:y+h, x:x+w]
new = new_image
import cv2 as cv
orig = new.copy()
(origH, origW) = new.shape[:2]
rW = origW / 320.0
rH = origH / 320.0
# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net = cv.dnn.readNet('frozen_east_text_detection.pb')
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < 0.5:
continue
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
confidence_val.append(scoresData[i])
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
# initialize the list of results
results = []
# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
#extract the region of interest
r = orig[startY:endY, startX:endX]
plt.imshow(r)
#configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 7")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
The results are bad - but my EAST model is identify the contour ( area) where the digits are present. Can you please help me ? I have tried different psm values in config for image_to_string.
Use InRange() for selection. See example:
import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
frame_threshold=cv.bitwise_not(frame_threshold)
cv.imwrite('out_36.png', frame_threshold)

Bad character recognition with Pytesseract OCR for images with table structure

I use a code to locate text boxes and create a rectangle around them. This allows me to rebuild the grid around the table structure in the image.
However, even if the text box detection works very well, if I try to define the characters present in each rectangle, pytesseract does not identify them well and does not allow to find the original text.
Here is my Python code :
import os
import cv2
import imutils
import argparse
import numpy as np
import pytesseract
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def img_estim(img, threshold=127):
is_dark = np.mean(img) < threshold
return True if is_dark else False
# Negative
if img_estim(pre):
print("non")
pre = cv2.bitwise_not(pre)
# Contrast & Brightness control
contrast = 2.0 #0 to 3
brightness = 0 #0 to 100
for y in range(pre.shape[0]):
for x in range(pre.shape[1]):
pre[y,x] = np.clip(contrast*pre[y,x] + brightness, 0, 255)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=15, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
# ap.add_argument("-east", "--east", type=str,
# help="path to input EAST text detector")
args = vars(ap.parse_args())
in_file = os.path.join("images", args["image"])
pre_file = os.path.join("images", "pre.png")
out_file = os.path.join("images", "out.png")
img = cv2.imread(os.path.join(in_file))
top, bottom, left, right = [25]*4
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_REPLICATE)
orig = img.copy()
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# (H, W) = img.shape[:2]
# net = cv2.dnn.readNet(args["east"])
# blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
# net.setInput(blob)
# Visualize the result
vis = img.copy()
results = []
for box in text_boxes:
(x, y, w, h) = box
startX = x -2
startY = y -2
endX = x + w
endY = y + h
cv2.rectangle(vis, (startX, startY), (endX, endY), (0, 255, 0), 1)
roi=orig[startX:endX,startY:endY]
config = ("-l eng --psm 6")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(roi,config=config )
results.append(((startX, startY, (endX), (endY)), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((startX, startY, endX, endY), text) in results:
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(output, (startX, startY), (endX, endY),(0, 0, 255), 1)
cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# for line in hor_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
# for line in ver_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)
Initial image :
Initial image
Preprocessed image with detection of text outlines to define the dimensions of rectangles :
Preprocessed image with detection of text outlines to define the dimensions of rectangles
Final image :
Final image
Résultat obtenu par OCR :
"
a
ra
at
12
1
"
Thank you in advance for your help, hope my description is clear enough.
When performing OCR, it is extrememly important to preprocess the image to get the foreground text in black with the background in white. In addition, enlarging the image can help improve the detection results. I've also found that adding a slight Gaussian blur improves accuracy before throwing it into Pytesseract. Here's the results with --psm 6 to treat the image as a single block of text. Look here for more configuration options.
Preprocessed enlarged, thresholded, and slightly blurred image
Results from Pytesseract OCR
Series Type Scan Range CTDIvol DLP Phantom
(mm) (mGy) — (mGy-cm) cm
1 Scout - - - -
1 Scout - - - -
2 Axial = 113.554-1272.929 11.22 269.35 Body 32
Total Exam DLP: = 269.35
1/1
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=700)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imwrite('thresh.png', thresh)
cv2.waitKey()

How to tune tesseract for identifying number plate of a car more accurately?

I have a code to detect and identify the car number plate and convert the image into text using tesseract.
I am using openCV to localise the number plate.
The problem that I am facing is that tesseract is not accurately identifying the number. Is there any way I can improve the tesseract performance?
My code (which I downloaded from Internet) is:
import numpy as np
import cv2
# from copy import deepcopy
from PIL import Image
import pytesseract as tess
# plate = 0
def preprocess(img):
# print ('preprocessing image')
# cv2.imshow("Input", img)
imgBlurred = cv2.GaussianBlur(img, (5, 5), 0)
gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY)
sobelx = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)
cv2.imshow("Sobel",sobelx)
cv2.waitKey(0)
ret2, threshold_img = cv2.threshold(sobelx, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Threshold",threshold_img)
cv2.waitKey(0)
return threshold_img
def cleanPlate(plate):
# print ("CLEANING PLATE. . .")
gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
thresh= cv2.dilate(gray, kernel, iterations=1)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
im1, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if contours:
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
max_cnt = contours[max_index]
max_cntArea = areas[max_index]
x, y, w, h = cv2.boundingRect(max_cnt)
if not ratioCheck(max_cntArea, w, h):
return plate, None
cleaned_final = thresh[y:y + h, x:x + w]
# cv2.imshow("Function Test",cleaned_final)
return cleaned_final, [x, y, w, h]
else:
return plate, None
def extract_contours(threshold_img):
# print ('extracting contours')
element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(17, 3))
morph_img_threshold = threshold_img.copy()
cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element, dst=morph_img_threshold)
cv2.imshow("Morphed", morph_img_threshold)
cv2.waitKey(0)
im2, contours, hierarchy = cv2.findContours(morph_img_threshold, mode=cv2.RETR_EXTERNAL,
method=cv2.CHAIN_APPROX_NONE)
return contours
def ratioCheck(area, width, height):
# print ('checking ratio')
ratio = float(width) / float(height)
if ratio < 1:
ratio = 1 / ratio
aspect = 4.7272
min = 15 * aspect * 15 # minimum area
max = 125 * aspect * 125 # maximum area
rmin = 3
rmax = 6
if (area < min or area > max) or (ratio < rmin or ratio > rmax):
return False
return True
def isMaxWhite(plate):
# print ('is Max white')
avg = np.mean(plate)
if (avg >= 115):
return True
else:
return False
def validateRotationAndRatio(rect):
# print( 'validate the rotation and ratio')
(x, y), (width, height), rect_angle = rect
if (width > height):
angle = -rect_angle
else:
angle = 90 + rect_angle
if angle > 15:
return False
if height == 0 or width == 0:
return False
area = height * width
if not ratioCheck(area, width, height):
return False
else:
return True
def cleanAndRead(img, contours):
# print ('clean and read')
# count=0
for i, cnt in enumerate(contours):
min_rect = cv2.minAreaRect(cnt)
if validateRotationAndRatio(min_rect):
x, y, w, h = cv2.boundingRect(cnt)
plate_img = img[y:y + h, x:x + w]
if (isMaxWhite(plate_img)):
# count+=1
clean_plate, rect = cleanPlate(plate_img)
if rect:
x1, y1, w1, h1 = rect
x, y, w, h = x + x1, y + y1, w1, h1
cv2.imshow("Cleaned Plate", clean_plate)
cv2.waitKey(0)
plate_im = Image.fromarray(clean_plate)
plate_im.save('donald1.png')
text = tess.image_to_string(plate_im, lang='eng')
# print text
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Detected Plate", img)
cv2.waitKey(0)
return text
numberplate = 0
img = cv2.imread("car_number_plate.jpg")
threshold_img = preprocess(img)
contours = extract_contours(threshold_img)
# if len(contours)!=0:
# print len(contours) #Test
# cv2.drawContours(img, contours, -1, (0,255,0), 1)
# cv2.imshow("Contours",img)
# cv2.waitKey(0)
plate = cleanAndRead(img, contours)
print ('plate information: ', plate)
If my number plate is: MH01AV8866
It will be recognised as MH01AY8866
Any suggestion will be appreciated. Let me know if any other information is required too.
You are using tesseract as a general model for your problem you can tune your model for that you need to generate synthetic data for your number plates with this
https://github.com/Belval/TextRecognitionDataGenerator
and then you can tune your model using the steps provided
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00---Finetune
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00
I've tuned the tesseract on synthetic data and it works like a charm, tried CNN models and tesseract both and tesseract trains better with lesser data and gives better performance.

Categories

Resources