Binarize low contrast images - python

I have a bunch of image-snippets with low contrast which I'd like to binarize using python.
I tried varies thresholding methods like Otsu and Huang, but none seems to work for all my image snippets.
Following instructions like this one, I puzzled together the code below:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import math
import glob
import os.path
import os
def permissions(targetfile):
os.chmod(targetfile, mode=0o755)
os.chown(targetfile, 1000, 1000)
#resize snippet
def resize( image):
image_resized = cv2.resize(image, None, fx=12, fy=12)
return image_resized
#Apply clahe
def clahe( image):
# Clahe parameters
cl1 = 6
cl2 = 9
cl3 = 9
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=cl1, tileGridSize=(cl2, cl3))
cv_gray_clahe = clahe.apply(image_gray)
return cv_gray_clahe
# Binarize image using Huangs method (https://github.com/dnhkng/Huang-Thresholding)
def binarize( image):
# image = np.array(image) # image needs to be of class 'numpy.ndarray'
histogram, bin_edges = np.histogram(image, bins=range(257))
huang_threshold = Huang(histogram)
huang_threshold = huang_threshold
threshold = np.where(image > huang_threshold, 1, 0)
threshold = threshold.astype(np.uint8)
return threshold
def Huang(data):
"""Implements Huang's fuzzy thresholding method
Uses Shannon's entropy function (one can also use Yager's entropy function)
Huang L.-K. and Wang M.-J.J. (1995) "Image Thresholding by Minimizing
the Measures of Fuzziness" Pattern Recognition, 28(1): 41-51"""
threshold=-1
first_bin= 0
for ih in range(254):
if data[ih] != 0:
first_bin = ih
break
last_bin=254;
for ih in range(254,-1,-1):
if data[ih] != 0:
last_bin = ih
break
term = 1.0 / (last_bin - first_bin)
# print (first_bin, last_bin, term)
mu_0 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(first_bin,254):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_0[ih] = sum_pix / num_pix # NUM_PIX cannot be zero !
mu_1 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(last_bin, 1, -1 ):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_1[ih-1] = sum_pix / num_pix # NUM_PIX cannot be zero !
min_ent = float("inf")
for it in range(254):
ent = 0.0
for ih in range(it):
# Equation (4) in Reference
mu_x = 1.0 / ( 1.0 + term * math.fabs( ih - mu_0[it]))
if ( not ((mu_x < 1e-06 ) or (mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
for ih in range(it + 1, 254):
# Equation (4) in Ref. 1 */
mu_x = 1.0 / (1.0 + term * math.fabs( ih - mu_1[it]))
if ( not((mu_x < 1e-06 ) or ( mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
if (ent < min_ent):
min_ent = ent
threshold = it
# print ("min_ent, threshold ", min_ent, threshold)
return threshold
#Inputfiles:
path = glob.glob("./" + "*.JPG")
path.extend(glob.glob("./" + "*.jpg"))
#Output directory
targetdir = "./output/"
os.makedirs( targetdir, exist_ok=True)
permissions(targetdir)
for img in path:
poststring = ""
targetfile = targetdir + os.path.basename(img).split('.')[0] + poststring + \
os.path.splitext(img)[1]
# Change filename of targetfile
if not os.path.exists(targetfile):
print("Processing targetfile: ", targetfile)
# read image and resize
image = cv2.imread(img)
resized_image = resize(image)
#clahe
clahe_image = clahe(resized_image)
denoised_image = cv2.fastNlMeansDenoising(clahe_image, h = 21, templateWindowSize = 9, searchWindowSize = 21)
#huang thresholding
binarized_image = binarize( denoised_image)
binarized_image *= 255
# dilate
kernel = np.ones((12,12),np.uint8)
dilate = cv2.dilate(binarized_image,kernel,iterations=3)
# Flood fill
h, w = image.shape[:2]
for row in range(h):
if dilate[row, 0] == 255:
cv2.floodFill(dilate, None, (0, row), 0)
if dilate[row, w-1] == 255:
cv2.floodFill(dilate, None, (w-1, row), 0)
for col in range(w):
if dilate[0, col] == 255:
cv2.floodFill(dilate, None, (col, 0), 0)
if dilate[h-1, col] == 255:
cv2.floodFill(dilate, None, (col, h-1), 0)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
foreground = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel)
foreground = cv2.morphologyEx(foreground, cv2.MORPH_CLOSE, kernel)
# Creating background
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17))
background = cv2.dilate(foreground, kernel, iterations=3)
cv2.imwrite(targetfile, background)
permissions(targetfile)
else:
print("Skipping, because already existing: ", targetfile)
permissions(targetfile)
print('')
The result is still not satisfying:
Could you please advise on how to loose the noise, maintain the desired features and receive straight/ellipse-like contourlines?
Adding the original snippets here for testing purposes: download snippets

Here is one approach by stretching the contrast first. It may need tuning for other images.
Read the input
Stretch the contrast
Convert to gray
Adaptive threshold
Get contours and filter on area larger than some threshold
Draw white filled contours on black background for those contours that pass the filter
Save the results
Input:
import cv2
import numpy as np
import skimage.exposure
# load image
img = cv2.imread('low_contrast.png')
# stretch contrast
stretch = skimage.exposure.rescale_intensity(img, in_range=(95,115), out_range=(0,255)).astype(np.uint8)
# Gaussian blur
blur = cv2.GaussianBlur(stretch, (0,0), sigmaX=5, sigmaY=5)
# convert to gray
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, -9)
# get contours and filter on area
contour_img = img.copy()
result = np.zeros_like(thresh)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 2000:
cv2.drawContours(contour_img, [cntr], 0, (255,255,255), 1)
cv2.drawContours(result, [cntr], 0, (255), -1)
cv2.imwrite('low_contrast_stretched.png', stretch)
cv2.imwrite('low_contrast_blur.png', blur)
cv2.imwrite('low_contrast_gray.png', gray)
cv2.imwrite('low_contrast_thresh.png', thresh)
cv2.imwrite('low_contrast_contours.png', contour_img)
cv2.imwrite('low_contrast_contours_filled.png', result)
cv2.imshow('stretch', stretch)
cv2.imshow('blur', blur)
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('contours', contour_img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contrast stretched image:
Blurred image:
Grayscale image:
Adaptive threshold image:
Filtered Contour Image:
Final Binary image:

Related

Detect drops of water using OpenCV

I am trying to detect drops inside the water, where at first I will detect the edges, but there are light spots in the image, which are also detected as drops.
Noting that the drops are white surrounded by a dark layer.
My code :
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (5,5), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
def clahe(img, clip_limit = 2.0):
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(5,5))
return clahe.apply(img)
def get_sobel(img, size = -1):
sobelx64f = cv2.Sobel(img,cv2.CV_64F,2,0,size)
abs_sobel64f = np.absolute(sobelx64f)
return np.uint8(abs_sobel64f)
img = cv2.imread("img_brightened.jpg")
# save color copy for visualizing
imgc = img.copy()
# resize image to make the analytics easier (a form of filtering)
resize_times = 1.5
img = cv2.resize(img, None, img, fx = 1 / resize_times, fy = 1 / resize_times)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Input", img)
# use sobel operator to evaluate high frequencies
sobel = get_sobel(img)
# experimentally calculated function - needs refining
clip_limit = (-2.556) * np.sum(sobel)/(img.shape[0] * img.shape[1]) + 26.557
# don't apply clahe if there is enough high freq to find blobs
if(clip_limit < 1.0):
clip_limit = 0.1
# limit clahe if there's not enough details - needs more tests
if(clip_limit > 8.0):
clip_limit = 8
# apply clahe and unsharp mask to improve high frequencies as much as possible
img = clahe(img, clip_limit)
img = unsharp_mask(img)
# filter the image to ensure edge continuity and perform Canny
img_blurred = (cv2.GaussianBlur(img, (2*2+1,2*2+1), 0))
canny = cv2.Canny(img_blurred, 100, 255)
cv2.imshow("Output", canny)
cv2.waitKey(0)
Result
I used codes from https://github.com/kavyamusty/Shading-removal-of-images/blob/master/Article%20submission.ipynb, which works for removing shadows first, then the cv2.HoughCircles to find the circles.
The codes as below :
import cv2
import numpy as np
import matplotlib.pyplot as plt
def max_filtering(N, I_temp):
wall = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
wall[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)] = I_temp.copy()
temp = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
for y in range(0,wall.shape[0]):
for x in range(0,wall.shape[1]):
if wall[y,x]!=-1:
window = wall[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num = np.amax(window)
temp[y,x] = num
A = temp[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)].copy()
return A
def min_filtering(N, A):
wall_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
wall_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)] = A.copy()
temp_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
for y in range(0,wall_min.shape[0]):
for x in range(0,wall_min.shape[1]):
if wall_min[y,x]!=300:
window_min = wall_min[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num_min = np.amin(window_min)
temp_min[y,x] = num_min
B = temp_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)].copy()
return B
def background_subtraction(I, B):
O = I - B
norm_img = cv2.normalize(O, None, 0,255, norm_type=cv2.NORM_MINMAX)
return norm_img
def min_max_filtering(M, N, I):
if M == 0:
#max_filtering
A = max_filtering(N, I)
#min_filtering
B = min_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
elif M == 1:
#min_filtering
A = min_filtering(N, I)
#max_filtering
B = max_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
return normalised_img
# Read Image
img = cv2.imread(r"D:/Image.jpg")
# Copy origin image
cimg = img.copy()
# Initialization array of uint8
img_remove_shadow = np.zeros(np.shape(img), dtype="uint8")
for i in range(np.shape(img)[2]):
img_remove_shadow[:, :, i] = np.array(min_max_filtering(M = 0, N = 20, I = img[:, :, i]))
# Using median blur
img = cv2.medianBlur(img_remove_shadow,5)
# Change to gray image
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Removing Shadow", img)
# Find circles
circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1, 5, np.array([]), 40, 23, 5,20)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
cv2.imshow('detected circles',cimg)
cv2.waitKey(0)
cv2.destroyAllWindows()
The result is as below:
PS: The codes take 11.74s running time, I would appreciate it if someone could optimize the code.

Crop all characters from an image

I put together some code to extract all characters from an image. I sort the characters from left to right and I try to crop each character into a separate image. Not all characters are properly cropped, some of them end up having size zero.
The only characters that do not have one dimension zero are BCDEF. Here is an image of the output.
import cv2
import numpy as np
def crop_minAreaRect(img, rect):
# https://stackoverflow.com/questions/37177811/crop-rectangle-returned-by-minarearect-opencv-python
# rotate img
center = rect[0]
size = rect[1]
print("size[0]: " + str(int(size[0])) + ", size[1]: " + str(int(size[1])))
angle = rect[2]
print("angle: " + str(angle))
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], angle)
box = cv2.boxPoints(rect0)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1], pts[1][0]:pts[2][0]]
w, h = img_crop.shape[0], img_crop.shape[1]
print("w_cropped: " + str(w) + ", h_cropped: " + str(h))
return img_crop
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b:b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cv2.drawContours(im_copy, contours, -1, (0,255,0), 2)
#cv2.imshow("contours", im_copy)
print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
# print(str(rect))
# if rect[1][0] > 0 and rect[1][1]>0:
im_cropped = crop_minAreaRect(im, rect)
h,w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
print("w: " + str(w) + ", h: " + str(h))
if w>0 and h>0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)
There appears to be an error in your crop_minAreaRect function.
I haven't debugged your code any further than the return of crop_minAreaRect, so the letters may or may not be correctly rotated according following your approach, but this change fixes the underlying problem.
The proposed function is taken from the following question and modified: How to straighten a rotated rectangle area of an image using OpenCV in Python?
import cv2
import numpy as np
def subimage(image, center, theta, width, height):
'''
Rotates OpenCV image around center with angle theta (in deg)
then crops the image according to width and height.
'''
width = int(width)
height = int(height)
# Uncomment for theta in radians
# theta *= 180/np.pi
shape = (image.shape[1], image.shape[0]) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D(center=center, angle=theta, scale=1)
image = cv2.warpAffine(src=image, M=matrix, dsize=shape)
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[y:y + height, x:x + width]
return image
def sort_contours(cnts, method="left-to-right"):
# from https://pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
im_name = 'letters.png'
im = cv2.imread(im_name)
im_copy = im.copy()
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(im_copy, contours, -1, (0, 255, 0), 2)
cv2.imshow("contours", im_copy)
# print("num contours: " + str(len(contours)))
i = 0
sorted_cnts, bounding_boxes = sort_contours(contours, method="left-to-right")
for cnt in sorted_cnts:
size = cv2.contourArea(cnt)
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.minAreaRect(cnt)
im_cropped = subimage(im, center=rect[0], theta=rect[2], width=rect[1][0], height=rect[1][1])
h, w = im_cropped.shape[0], im_cropped.shape[1]
if w > h:
im_cropped = cv2.rotate(im_cropped, cv2.ROTATE_90_CLOCKWISE)
# print("w: " + str(w) + ", h: " + str(h))
if w > 0 and h > 0:
cv2.imshow("cropped" + str(i), im_cropped)
i += 1
# cv2.waitKey(0)
cv2.waitKey(0)

Why is the video output so slow (frame by frame) through OpenCV (Python)?

I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame

Tesseract not detecting any text on RGB images on Python

Hey I started working with Tesseract OCR but I'm having problems getting the text from really simple RGB images.
It works just fine with text2image images.
Here is my code:
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import sys
class wordExtractor():
def __init__(self, image_path):
self.image_path = image_path
pytesseract.pytesseract.tesseract_cmd = r'/home/yarin/tesseract/bin/debug/tesseract'
#self.resize_image()
def resize_image(self):
basewidth = 800
img = Image.open(self.image_path)
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), Image.ANTIALIAS)
os.remove(self.image_path)
img.save(self.image_path[:-4] + '.png')
self.image_path = self.image_path[:-4] + '.png'
def get_text(self, lang):
# load the example image and convert it to grayscale
image = cv2.imread(self.image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to preprocess the
# image
#if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
#elif args["preprocess"] == "blur":
# gray = cv2.medianBlur(gray, 3)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
#load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename), lang='eng')
os.remove(filename)
return text
# show the output images
#cv2.imshow("Image", image)
#cv2.imshow("Output", gray)
#cv2.waitKey(0)
w = wordExtractor('6.png')
print(w.get_text('eng'))
Tesseract returns empty string for the following images:
Please show me how can I solve this Thanks in advance!
After thresholding, you can use findContours to find contour for each shape. Then you can filter the contours and put every contour you are interested in into a blank white image. By then, you will get the letters and ready to process using tesseract. You can see the detail in the code below.
import cv2
import numpy as np
import pytesseract
# img = cv2.imread("dwLFQ.png", cv2.IMREAD_COLOR)
img = cv2.imread("NfwY4.png", cv2.IMREAD_COLOR)
# img = cv2.imread("xTH6s.png", cv2.IMREAD_COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
items = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
base = np.zeros(thresh.shape, dtype=np.uint8)
base = cv2.bitwise_not(base)
max_area = 0
for i in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[i])
ratio = h / w
area = cv2.contourArea(contours[i])
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
if 1 < ratio < 3:
max_area = max(area, max_area)
print("area: " + str(area) + ", max area: " + str(max_area) + ", ratio: " + str(ratio))
# if 1000 < area < max_area / 2:
if 1000 < area < 40000:
mask = np.zeros(thresh.shape, dtype=np.uint8)
cv2.drawContours(mask, [contours[i]], -1, color=255, thickness=-1)
mean = cv2.mean(thresh, mask=mask)
segment = np.zeros((h, w), dtype=np.uint8)
segment[:h, :w] = thresh[y:y + h, x:x + w]
if mean[0] > 150:
# white, invert
segment = cv2.bitwise_not(segment)
base[y:y + h, x:x + w] = segment[:h, :w]
cv2.imshow("base", base)
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
cv2.waitKey(0)
custom_config = r'-l eng --oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ " '
text = pytesseract.image_to_string(base, config=custom_config)
print("detected: " + text)
cv2.imshow("img", img)
cv2.imshow("base", base)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result
detected: NO
ENTRY

How to tune tesseract for identifying number plate of a car more accurately?

I have a code to detect and identify the car number plate and convert the image into text using tesseract.
I am using openCV to localise the number plate.
The problem that I am facing is that tesseract is not accurately identifying the number. Is there any way I can improve the tesseract performance?
My code (which I downloaded from Internet) is:
import numpy as np
import cv2
# from copy import deepcopy
from PIL import Image
import pytesseract as tess
# plate = 0
def preprocess(img):
# print ('preprocessing image')
# cv2.imshow("Input", img)
imgBlurred = cv2.GaussianBlur(img, (5, 5), 0)
gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY)
sobelx = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)
cv2.imshow("Sobel",sobelx)
cv2.waitKey(0)
ret2, threshold_img = cv2.threshold(sobelx, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Threshold",threshold_img)
cv2.waitKey(0)
return threshold_img
def cleanPlate(plate):
# print ("CLEANING PLATE. . .")
gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
thresh= cv2.dilate(gray, kernel, iterations=1)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
im1, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if contours:
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
max_cnt = contours[max_index]
max_cntArea = areas[max_index]
x, y, w, h = cv2.boundingRect(max_cnt)
if not ratioCheck(max_cntArea, w, h):
return plate, None
cleaned_final = thresh[y:y + h, x:x + w]
# cv2.imshow("Function Test",cleaned_final)
return cleaned_final, [x, y, w, h]
else:
return plate, None
def extract_contours(threshold_img):
# print ('extracting contours')
element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(17, 3))
morph_img_threshold = threshold_img.copy()
cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element, dst=morph_img_threshold)
cv2.imshow("Morphed", morph_img_threshold)
cv2.waitKey(0)
im2, contours, hierarchy = cv2.findContours(morph_img_threshold, mode=cv2.RETR_EXTERNAL,
method=cv2.CHAIN_APPROX_NONE)
return contours
def ratioCheck(area, width, height):
# print ('checking ratio')
ratio = float(width) / float(height)
if ratio < 1:
ratio = 1 / ratio
aspect = 4.7272
min = 15 * aspect * 15 # minimum area
max = 125 * aspect * 125 # maximum area
rmin = 3
rmax = 6
if (area < min or area > max) or (ratio < rmin or ratio > rmax):
return False
return True
def isMaxWhite(plate):
# print ('is Max white')
avg = np.mean(plate)
if (avg >= 115):
return True
else:
return False
def validateRotationAndRatio(rect):
# print( 'validate the rotation and ratio')
(x, y), (width, height), rect_angle = rect
if (width > height):
angle = -rect_angle
else:
angle = 90 + rect_angle
if angle > 15:
return False
if height == 0 or width == 0:
return False
area = height * width
if not ratioCheck(area, width, height):
return False
else:
return True
def cleanAndRead(img, contours):
# print ('clean and read')
# count=0
for i, cnt in enumerate(contours):
min_rect = cv2.minAreaRect(cnt)
if validateRotationAndRatio(min_rect):
x, y, w, h = cv2.boundingRect(cnt)
plate_img = img[y:y + h, x:x + w]
if (isMaxWhite(plate_img)):
# count+=1
clean_plate, rect = cleanPlate(plate_img)
if rect:
x1, y1, w1, h1 = rect
x, y, w, h = x + x1, y + y1, w1, h1
cv2.imshow("Cleaned Plate", clean_plate)
cv2.waitKey(0)
plate_im = Image.fromarray(clean_plate)
plate_im.save('donald1.png')
text = tess.image_to_string(plate_im, lang='eng')
# print text
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Detected Plate", img)
cv2.waitKey(0)
return text
numberplate = 0
img = cv2.imread("car_number_plate.jpg")
threshold_img = preprocess(img)
contours = extract_contours(threshold_img)
# if len(contours)!=0:
# print len(contours) #Test
# cv2.drawContours(img, contours, -1, (0,255,0), 1)
# cv2.imshow("Contours",img)
# cv2.waitKey(0)
plate = cleanAndRead(img, contours)
print ('plate information: ', plate)
If my number plate is: MH01AV8866
It will be recognised as MH01AY8866
Any suggestion will be appreciated. Let me know if any other information is required too.
You are using tesseract as a general model for your problem you can tune your model for that you need to generate synthetic data for your number plates with this
https://github.com/Belval/TextRecognitionDataGenerator
and then you can tune your model using the steps provided
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00---Finetune
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00
I've tuned the tesseract on synthetic data and it works like a charm, tried CNN models and tesseract both and tesseract trains better with lesser data and gives better performance.

Categories

Resources