Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.
Following is my code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median
binary = cv2.bitwise_not(image_gray)
blur = cv2.GaussianBlur(image_gray,(5,5),0)
ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(th2, 50, 80, 255)
#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)
contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
rect_cnts = []
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
pass
else :
rect_cnts.append(cnt)
max_area = 0
football_square = None
for cnt in rect_cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
if max_area < w*h:
max_area = w*h
football_square = cnt
image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)
(x, y, w, h) = cv2.boundingRect(football_square)
new_image = image[y:y+h, x:x+w]
new = new_image
import cv2 as cv
orig = new.copy()
(origH, origW) = new.shape[:2]
rW = origW / 320.0
rH = origH / 320.0
# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net = cv.dnn.readNet('frozen_east_text_detection.pb')
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < 0.5:
continue
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
confidence_val.append(scoresData[i])
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
# initialize the list of results
results = []
# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
#extract the region of interest
r = orig[startY:endY, startX:endX]
plt.imshow(r)
#configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 7")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
The results are bad - but my EAST model is identify the contour ( area) where the digits are present. Can you please help me ? I have tried different psm values in config for image_to_string.
Use InRange() for selection. See example:
import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
frame_threshold=cv.bitwise_not(frame_threshold)
cv.imwrite('out_36.png', frame_threshold)
Related
i wrote a program to capture the position of license plate with my webcam feed using YOLOv4. The result of the detection is then passed to easyOCR to do character identification. Right now, im calling the OCR function in the while loop everytime a detection occured. Is there a way to call the OCR function outside the loop without stopping the webcam feed ? some people suggested me to use queue or sub process but im not quite familiar with the concept. Any help would be very appreciated
#detection
while 1:
#_, pre_img = cap.read()
#pre_img= cv2.resize(pre_img, (640, 480))
_, img = cap.read()
#img = cv2.flip(pre_img,1)
hight, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_name = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_name)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.7:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .5, .4)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .8, .4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
detected_image = img[y:y+h, x:x+w]
cv2.putText(img, label + " " + confidence, (x, y + 400), font, 2, color, 2)
#print(detected_image)
cv2.imshow('detection',detected_image)
result = OCR(detected_image)
print(result)
Function for OCR
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
return spliced
You could run the OCR function on an other thread with the thread library like so:
import time # not necessary only to simulate work time
import _thread as thread # in python 3 the name has changed to _thread
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
print(spliced) # you would have to print the result in the OCR function because you can't easily return stuff
while 1:
time.sleep(5) # simulating some work time
print("main")
detected_image = 1
thread.start_new_thread(OCR, (detected_image,)) # calling the OCR function on a new thread.
I hope it will help you...
I put together some code to extract all characters from an image. I sort the characters from left to right and I try to crop each character into a separate image. Not all characters are properly cropped, some of them end up having size zero.
The only characters that do not have one dimension zero are BCDEF. Here is an image of the output.
import cv2
import numpy as np
def crop_minAreaRect(img, rect):
# https://stackoverflow.com/questions/37177811/crop-rectangle-returned-by-minarearect-opencv-python
# rotate img
center = rect[0]
size = rect[1]
print("size[0]: " + str(int(size[0])) + ", size[1]: " + str(int(size[1])))
angle = rect[2]
print("angle: " + str(angle))
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], angle)
box = cv2.boxPoints(rect0)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1], pts[1][0]:pts[2][0]]
w, h = img_crop.shape[0], img_crop.shape[1]
print("w_cropped: " + str(w) + ", h_cropped: " + str(h))
return img_crop
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b:b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cv2.drawContours(im_copy, contours, -1, (0,255,0), 2)
#cv2.imshow("contours", im_copy)
print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
# print(str(rect))
# if rect[1][0] > 0 and rect[1][1]>0:
im_cropped = crop_minAreaRect(im, rect)
h,w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
print("w: " + str(w) + ", h: " + str(h))
if w>0 and h>0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)
There appears to be an error in your crop_minAreaRect function.
I haven't debugged your code any further than the return of crop_minAreaRect, so the letters may or may not be correctly rotated according following your approach, but this change fixes the underlying problem.
The proposed function is taken from the following question and modified: How to straighten a rotated rectangle area of an image using OpenCV in Python?
import cv2
import numpy as np
def subimage(image, center, theta, width, height):
'''
Rotates OpenCV image around center with angle theta (in deg)
then crops the image according to width and height.
'''
width = int(width)
height = int(height)
# Uncomment for theta in radians
# theta *= 180/np.pi
shape = (image.shape[1], image.shape[0]) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D(center=center, angle=theta, scale=1)
image = cv2.warpAffine(src=image, M=matrix, dsize=shape)
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[y:y + height, x:x + width]
return image
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(im_copy, contours, -1, (0, 255, 0), 2)
cv2.imshow("contours", im_copy)
# print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
im_cropped = subimage(im, center=rect[0], theta=rect[2], width=rect[1][0], height=rect[1][1])
h, w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
# print("w: " + str(w) + ", h: " + str(h))
if w > 0 and h > 0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)
The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword#172.16.66.106/live -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
take_pic(output_filename)
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
else:
print("camera open failed")
cv2.destroyAllWindows()
With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
break
If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
Try:
cv2.imshow("demo", img * 255)
cv2.waitKey(0)
I use a code to locate text boxes and create a rectangle around them. This allows me to rebuild the grid around the table structure in the image.
However, even if the text box detection works very well, if I try to define the characters present in each rectangle, pytesseract does not identify them well and does not allow to find the original text.
Here is my Python code :
import os
import cv2
import imutils
import argparse
import numpy as np
import pytesseract
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def img_estim(img, threshold=127):
is_dark = np.mean(img) < threshold
return True if is_dark else False
# Negative
if img_estim(pre):
print("non")
pre = cv2.bitwise_not(pre)
# Contrast & Brightness control
contrast = 2.0 #0 to 3
brightness = 0 #0 to 100
for y in range(pre.shape[0]):
for x in range(pre.shape[1]):
pre[y,x] = np.clip(contrast*pre[y,x] + brightness, 0, 255)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=15, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
# ap.add_argument("-east", "--east", type=str,
# help="path to input EAST text detector")
args = vars(ap.parse_args())
in_file = os.path.join("images", args["image"])
pre_file = os.path.join("images", "pre.png")
out_file = os.path.join("images", "out.png")
img = cv2.imread(os.path.join(in_file))
top, bottom, left, right = [25]*4
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_REPLICATE)
orig = img.copy()
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# (H, W) = img.shape[:2]
# net = cv2.dnn.readNet(args["east"])
# blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
# net.setInput(blob)
# Visualize the result
vis = img.copy()
results = []
for box in text_boxes:
(x, y, w, h) = box
startX = x -2
startY = y -2
endX = x + w
endY = y + h
cv2.rectangle(vis, (startX, startY), (endX, endY), (0, 255, 0), 1)
roi=orig[startX:endX,startY:endY]
config = ("-l eng --psm 6")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(roi,config=config )
results.append(((startX, startY, (endX), (endY)), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((startX, startY, endX, endY), text) in results:
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(output, (startX, startY), (endX, endY),(0, 0, 255), 1)
cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# for line in hor_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
# for line in ver_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)
Initial image :
Initial image
Preprocessed image with detection of text outlines to define the dimensions of rectangles :
Preprocessed image with detection of text outlines to define the dimensions of rectangles
Final image :
Final image
Résultat obtenu par OCR :
"
a
ra
at
12
1
"
Thank you in advance for your help, hope my description is clear enough.
When performing OCR, it is extrememly important to preprocess the image to get the foreground text in black with the background in white. In addition, enlarging the image can help improve the detection results. I've also found that adding a slight Gaussian blur improves accuracy before throwing it into Pytesseract. Here's the results with --psm 6 to treat the image as a single block of text. Look here for more configuration options.
Preprocessed enlarged, thresholded, and slightly blurred image
Results from Pytesseract OCR
Series Type Scan Range CTDIvol DLP Phantom
(mm) (mGy) — (mGy-cm) cm
1 Scout - - - -
1 Scout - - - -
2 Axial = 113.554-1272.929 11.22 269.35 Body 32
Total Exam DLP: = 269.35
1/1
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=700)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imwrite('thresh.png', thresh)
cv2.waitKey()
I have a code to detect and identify the car number plate and convert the image into text using tesseract.
I am using openCV to localise the number plate.
The problem that I am facing is that tesseract is not accurately identifying the number. Is there any way I can improve the tesseract performance?
My code (which I downloaded from Internet) is:
import numpy as np
import cv2
# from copy import deepcopy
from PIL import Image
import pytesseract as tess
# plate = 0
def preprocess(img):
# print ('preprocessing image')
# cv2.imshow("Input", img)
imgBlurred = cv2.GaussianBlur(img, (5, 5), 0)
gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY)
sobelx = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)
cv2.imshow("Sobel",sobelx)
cv2.waitKey(0)
ret2, threshold_img = cv2.threshold(sobelx, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Threshold",threshold_img)
cv2.waitKey(0)
return threshold_img
def cleanPlate(plate):
# print ("CLEANING PLATE. . .")
gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
thresh= cv2.dilate(gray, kernel, iterations=1)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
im1, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if contours:
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
max_cnt = contours[max_index]
max_cntArea = areas[max_index]
x, y, w, h = cv2.boundingRect(max_cnt)
if not ratioCheck(max_cntArea, w, h):
return plate, None
cleaned_final = thresh[y:y + h, x:x + w]
# cv2.imshow("Function Test",cleaned_final)
return cleaned_final, [x, y, w, h]
else:
return plate, None
def extract_contours(threshold_img):
# print ('extracting contours')
element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(17, 3))
morph_img_threshold = threshold_img.copy()
cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element, dst=morph_img_threshold)
cv2.imshow("Morphed", morph_img_threshold)
cv2.waitKey(0)
im2, contours, hierarchy = cv2.findContours(morph_img_threshold, mode=cv2.RETR_EXTERNAL,
method=cv2.CHAIN_APPROX_NONE)
return contours
def ratioCheck(area, width, height):
# print ('checking ratio')
ratio = float(width) / float(height)
if ratio < 1:
ratio = 1 / ratio
aspect = 4.7272
min = 15 * aspect * 15 # minimum area
max = 125 * aspect * 125 # maximum area
rmin = 3
rmax = 6
if (area < min or area > max) or (ratio < rmin or ratio > rmax):
return False
return True
def isMaxWhite(plate):
# print ('is Max white')
avg = np.mean(plate)
if (avg >= 115):
return True
else:
return False
def validateRotationAndRatio(rect):
# print( 'validate the rotation and ratio')
(x, y), (width, height), rect_angle = rect
if (width > height):
angle = -rect_angle
else:
angle = 90 + rect_angle
if angle > 15:
return False
if height == 0 or width == 0:
return False
area = height * width
if not ratioCheck(area, width, height):
return False
else:
return True
def cleanAndRead(img, contours):
# print ('clean and read')
# count=0
for i, cnt in enumerate(contours):
min_rect = cv2.minAreaRect(cnt)
if validateRotationAndRatio(min_rect):
x, y, w, h = cv2.boundingRect(cnt)
plate_img = img[y:y + h, x:x + w]
if (isMaxWhite(plate_img)):
# count+=1
clean_plate, rect = cleanPlate(plate_img)
if rect:
x1, y1, w1, h1 = rect
x, y, w, h = x + x1, y + y1, w1, h1
cv2.imshow("Cleaned Plate", clean_plate)
cv2.waitKey(0)
plate_im = Image.fromarray(clean_plate)
plate_im.save('donald1.png')
text = tess.image_to_string(plate_im, lang='eng')
# print text
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Detected Plate", img)
cv2.waitKey(0)
return text
numberplate = 0
img = cv2.imread("car_number_plate.jpg")
threshold_img = preprocess(img)
contours = extract_contours(threshold_img)
# if len(contours)!=0:
# print len(contours) #Test
# cv2.drawContours(img, contours, -1, (0,255,0), 1)
# cv2.imshow("Contours",img)
# cv2.waitKey(0)
plate = cleanAndRead(img, contours)
print ('plate information: ', plate)
If my number plate is: MH01AV8866
It will be recognised as MH01AY8866
Any suggestion will be appreciated. Let me know if any other information is required too.
You are using tesseract as a general model for your problem you can tune your model for that you need to generate synthetic data for your number plates with this
https://github.com/Belval/TextRecognitionDataGenerator
and then you can tune your model using the steps provided
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00---Finetune
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00
I've tuned the tesseract on synthetic data and it works like a charm, tried CNN models and tesseract both and tesseract trains better with lesser data and gives better performance.