I am trying to detect drops inside the water, where at first I will detect the edges, but there are light spots in the image, which are also detected as drops.
Noting that the drops are white surrounded by a dark layer.
My code :
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (5,5), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
def clahe(img, clip_limit = 2.0):
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(5,5))
return clahe.apply(img)
def get_sobel(img, size = -1):
sobelx64f = cv2.Sobel(img,cv2.CV_64F,2,0,size)
abs_sobel64f = np.absolute(sobelx64f)
return np.uint8(abs_sobel64f)
img = cv2.imread("img_brightened.jpg")
# save color copy for visualizing
imgc = img.copy()
# resize image to make the analytics easier (a form of filtering)
resize_times = 1.5
img = cv2.resize(img, None, img, fx = 1 / resize_times, fy = 1 / resize_times)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Input", img)
# use sobel operator to evaluate high frequencies
sobel = get_sobel(img)
# experimentally calculated function - needs refining
clip_limit = (-2.556) * np.sum(sobel)/(img.shape[0] * img.shape[1]) + 26.557
# don't apply clahe if there is enough high freq to find blobs
if(clip_limit < 1.0):
clip_limit = 0.1
# limit clahe if there's not enough details - needs more tests
if(clip_limit > 8.0):
clip_limit = 8
# apply clahe and unsharp mask to improve high frequencies as much as possible
img = clahe(img, clip_limit)
img = unsharp_mask(img)
# filter the image to ensure edge continuity and perform Canny
img_blurred = (cv2.GaussianBlur(img, (2*2+1,2*2+1), 0))
canny = cv2.Canny(img_blurred, 100, 255)
cv2.imshow("Output", canny)
cv2.waitKey(0)
Result
I used codes from https://github.com/kavyamusty/Shading-removal-of-images/blob/master/Article%20submission.ipynb, which works for removing shadows first, then the cv2.HoughCircles to find the circles.
The codes as below :
import cv2
import numpy as np
import matplotlib.pyplot as plt
def max_filtering(N, I_temp):
wall = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
wall[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)] = I_temp.copy()
temp = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
for y in range(0,wall.shape[0]):
for x in range(0,wall.shape[1]):
if wall[y,x]!=-1:
window = wall[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num = np.amax(window)
temp[y,x] = num
A = temp[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)].copy()
return A
def min_filtering(N, A):
wall_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
wall_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)] = A.copy()
temp_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
for y in range(0,wall_min.shape[0]):
for x in range(0,wall_min.shape[1]):
if wall_min[y,x]!=300:
window_min = wall_min[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num_min = np.amin(window_min)
temp_min[y,x] = num_min
B = temp_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)].copy()
return B
def background_subtraction(I, B):
O = I - B
norm_img = cv2.normalize(O, None, 0,255, norm_type=cv2.NORM_MINMAX)
return norm_img
def min_max_filtering(M, N, I):
if M == 0:
#max_filtering
A = max_filtering(N, I)
#min_filtering
B = min_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
elif M == 1:
#min_filtering
A = min_filtering(N, I)
#max_filtering
B = max_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
return normalised_img
# Read Image
img = cv2.imread(r"D:/Image.jpg")
# Copy origin image
cimg = img.copy()
# Initialization array of uint8
img_remove_shadow = np.zeros(np.shape(img), dtype="uint8")
for i in range(np.shape(img)[2]):
img_remove_shadow[:, :, i] = np.array(min_max_filtering(M = 0, N = 20, I = img[:, :, i]))
# Using median blur
img = cv2.medianBlur(img_remove_shadow,5)
# Change to gray image
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Removing Shadow", img)
# Find circles
circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1, 5, np.array([]), 40, 23, 5,20)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
cv2.imshow('detected circles',cimg)
cv2.waitKey(0)
cv2.destroyAllWindows()
The result is as below:
PS: The codes take 11.74s running time, I would appreciate it if someone could optimize the code.
Related
I have a bunch of image-snippets with low contrast which I'd like to binarize using python.
I tried varies thresholding methods like Otsu and Huang, but none seems to work for all my image snippets.
Following instructions like this one, I puzzled together the code below:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import math
import glob
import os.path
import os
def permissions(targetfile):
os.chmod(targetfile, mode=0o755)
os.chown(targetfile, 1000, 1000)
#resize snippet
def resize( image):
image_resized = cv2.resize(image, None, fx=12, fy=12)
return image_resized
#Apply clahe
def clahe( image):
# Clahe parameters
cl1 = 6
cl2 = 9
cl3 = 9
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=cl1, tileGridSize=(cl2, cl3))
cv_gray_clahe = clahe.apply(image_gray)
return cv_gray_clahe
# Binarize image using Huangs method (https://github.com/dnhkng/Huang-Thresholding)
def binarize( image):
# image = np.array(image) # image needs to be of class 'numpy.ndarray'
histogram, bin_edges = np.histogram(image, bins=range(257))
huang_threshold = Huang(histogram)
huang_threshold = huang_threshold
threshold = np.where(image > huang_threshold, 1, 0)
threshold = threshold.astype(np.uint8)
return threshold
def Huang(data):
"""Implements Huang's fuzzy thresholding method
Uses Shannon's entropy function (one can also use Yager's entropy function)
Huang L.-K. and Wang M.-J.J. (1995) "Image Thresholding by Minimizing
the Measures of Fuzziness" Pattern Recognition, 28(1): 41-51"""
threshold=-1
first_bin= 0
for ih in range(254):
if data[ih] != 0:
first_bin = ih
break
last_bin=254;
for ih in range(254,-1,-1):
if data[ih] != 0:
last_bin = ih
break
term = 1.0 / (last_bin - first_bin)
# print (first_bin, last_bin, term)
mu_0 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(first_bin,254):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_0[ih] = sum_pix / num_pix # NUM_PIX cannot be zero !
mu_1 = np.zeros(shape=(254,1))
num_pix = 0.0
sum_pix = 0.0
for ih in range(last_bin, 1, -1 ):
sum_pix = sum_pix + (ih * data[ih])
num_pix = num_pix + data[ih]
mu_1[ih-1] = sum_pix / num_pix # NUM_PIX cannot be zero !
min_ent = float("inf")
for it in range(254):
ent = 0.0
for ih in range(it):
# Equation (4) in Reference
mu_x = 1.0 / ( 1.0 + term * math.fabs( ih - mu_0[it]))
if ( not ((mu_x < 1e-06 ) or (mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
for ih in range(it + 1, 254):
# Equation (4) in Ref. 1 */
mu_x = 1.0 / (1.0 + term * math.fabs( ih - mu_1[it]))
if ( not((mu_x < 1e-06 ) or ( mu_x > 0.999999))):
# Equation (6) & (8) in Reference
ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
if (ent < min_ent):
min_ent = ent
threshold = it
# print ("min_ent, threshold ", min_ent, threshold)
return threshold
#Inputfiles:
path = glob.glob("./" + "*.JPG")
path.extend(glob.glob("./" + "*.jpg"))
#Output directory
targetdir = "./output/"
os.makedirs( targetdir, exist_ok=True)
permissions(targetdir)
for img in path:
poststring = ""
targetfile = targetdir + os.path.basename(img).split('.')[0] + poststring + \
os.path.splitext(img)[1]
# Change filename of targetfile
if not os.path.exists(targetfile):
print("Processing targetfile: ", targetfile)
# read image and resize
image = cv2.imread(img)
resized_image = resize(image)
#clahe
clahe_image = clahe(resized_image)
denoised_image = cv2.fastNlMeansDenoising(clahe_image, h = 21, templateWindowSize = 9, searchWindowSize = 21)
#huang thresholding
binarized_image = binarize( denoised_image)
binarized_image *= 255
# dilate
kernel = np.ones((12,12),np.uint8)
dilate = cv2.dilate(binarized_image,kernel,iterations=3)
# Flood fill
h, w = image.shape[:2]
for row in range(h):
if dilate[row, 0] == 255:
cv2.floodFill(dilate, None, (0, row), 0)
if dilate[row, w-1] == 255:
cv2.floodFill(dilate, None, (w-1, row), 0)
for col in range(w):
if dilate[0, col] == 255:
cv2.floodFill(dilate, None, (col, 0), 0)
if dilate[h-1, col] == 255:
cv2.floodFill(dilate, None, (col, h-1), 0)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
foreground = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel)
foreground = cv2.morphologyEx(foreground, cv2.MORPH_CLOSE, kernel)
# Creating background
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17))
background = cv2.dilate(foreground, kernel, iterations=3)
cv2.imwrite(targetfile, background)
permissions(targetfile)
else:
print("Skipping, because already existing: ", targetfile)
permissions(targetfile)
print('')
The result is still not satisfying:
Could you please advise on how to loose the noise, maintain the desired features and receive straight/ellipse-like contourlines?
Adding the original snippets here for testing purposes: download snippets
Here is one approach by stretching the contrast first. It may need tuning for other images.
Read the input
Stretch the contrast
Convert to gray
Adaptive threshold
Get contours and filter on area larger than some threshold
Draw white filled contours on black background for those contours that pass the filter
Save the results
Input:
import cv2
import numpy as np
import skimage.exposure
# load image
img = cv2.imread('low_contrast.png')
# stretch contrast
stretch = skimage.exposure.rescale_intensity(img, in_range=(95,115), out_range=(0,255)).astype(np.uint8)
# Gaussian blur
blur = cv2.GaussianBlur(stretch, (0,0), sigmaX=5, sigmaY=5)
# convert to gray
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, -9)
# get contours and filter on area
contour_img = img.copy()
result = np.zeros_like(thresh)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 2000:
cv2.drawContours(contour_img, [cntr], 0, (255,255,255), 1)
cv2.drawContours(result, [cntr], 0, (255), -1)
cv2.imwrite('low_contrast_stretched.png', stretch)
cv2.imwrite('low_contrast_blur.png', blur)
cv2.imwrite('low_contrast_gray.png', gray)
cv2.imwrite('low_contrast_thresh.png', thresh)
cv2.imwrite('low_contrast_contours.png', contour_img)
cv2.imwrite('low_contrast_contours_filled.png', result)
cv2.imshow('stretch', stretch)
cv2.imshow('blur', blur)
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('contours', contour_img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contrast stretched image:
Blurred image:
Grayscale image:
Adaptive threshold image:
Filtered Contour Image:
Final Binary image:
I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame
I have a pice of code which calculates and draws the dense optical flow:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
def getFrame():
ret, img = cap.read()
img = cv.resize(img,(640,480))
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
return gray
def draw_flow(img, flow, step=16):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
cv.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (_x2, _y2) in lines:
cv.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
def main():
prevgray = getFrame()
while True:
gray = getFrame()
flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 10, 1.2, 0)
prevgray = gray
cv.imshow('flow', draw_flow(gray, flow))
cv.waitKey(1)
if __name__ == '__main__':
main()
Since this takes too long, I had the idea to calculate sift features, match them and then calculate the optical flow of the features.
Until now I have this:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
sift = cv.xfeatures2d.SIFT_create()
bf = cv.BFMatcher()
def getFrame():
ret, img = cap.read()
img = cv.resize(img,(640,480))
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
return gray
def main():
prev_gray = getFrame()
prev_kp, prev_des = sift.detectAndCompute(prev_gray,None)
while True:
next_gray = getFrame()
next_kp, next_des = sift.detectAndCompute(next_gray,None)
matches = bf.knnMatch(prev_des, next_des, k=2)
# Apply ratio test
good = []
for m,n in matches:
if m.distance < 0.75*n.distance:
good.append([m])
#calc optical flow
prev_kp = next_kp
prev_des = next_des
prev_gray = next_gray
cv.waitKey(1)
if __name__ == '__main__':
main()
How can I calculate the optical flow of the sift features and draw them with the drwa_flow function in the first code?
I have also this code:
import numpy as np
import cv2 as cv
lk_params = dict( winSize = (15, 15),
maxLevel = 2,
criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
feature_params = dict( maxCorners = 500,
qualityLevel = 0.3,
minDistance = 7,
blockSize = 7 )
track_len = 5
tracks = []
cam = cv.VideoCapture(0)
def getFrame():
_ret, bgr = cam.read()
bgr = cv.resize(bgr, (640,480))
gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)
return gray, bgr
while True:
gray, vis = getFrame()
if len(tracks) > 0:
img0, img1 = prev_gray, gray
p0 = np.float32([tr[-1] for tr in tracks]).reshape(-1, 1, 2)
p1, _st, _err = cv.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params)
p0r, _st, _err = cv.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params)
d = abs(p0-p0r).reshape(-1, 2).max(-1)
good = d < 1
new_tracks = []
for tr, (x, y), good_flag in zip(tracks, p1.reshape(-1, 2), good):
if not good_flag:
continue
tr.append((x, y))
if len(tr) > track_len:
del tr[0]
new_tracks.append(tr)
cv.circle(vis, (x, y), 2, (0, 255, 0), -1)
tracks = new_tracks
cv.polylines(vis, [np.int32(tr) for tr in tracks], False, (0, 255, 0))
mask = np.zeros_like(gray)
mask[:] = 255
for x, y in [np.int32(tr[-1]) for tr in tracks]:
cv.circle(mask, (x, y), 5, 0, -1)
p = cv.goodFeaturesToTrack(gray, mask = mask, **feature_params)
if p is not None:
for x, y in np.float32(p).reshape(-1, 2):
tracks.append([(x, y)])
prev_gray = gray
cv.imshow('lk_track', vis)
ch = cv.waitKey(1)
But with this code do not uses the sift features and also don't draws the flow like the first code...
I have a code to detect and identify the car number plate and convert the image into text using tesseract.
I am using openCV to localise the number plate.
The problem that I am facing is that tesseract is not accurately identifying the number. Is there any way I can improve the tesseract performance?
My code (which I downloaded from Internet) is:
import numpy as np
import cv2
# from copy import deepcopy
from PIL import Image
import pytesseract as tess
# plate = 0
def preprocess(img):
# print ('preprocessing image')
# cv2.imshow("Input", img)
imgBlurred = cv2.GaussianBlur(img, (5, 5), 0)
gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY)
sobelx = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)
cv2.imshow("Sobel",sobelx)
cv2.waitKey(0)
ret2, threshold_img = cv2.threshold(sobelx, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Threshold",threshold_img)
cv2.waitKey(0)
return threshold_img
def cleanPlate(plate):
# print ("CLEANING PLATE. . .")
gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
thresh= cv2.dilate(gray, kernel, iterations=1)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
im1, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if contours:
areas = [cv2.contourArea(c) for c in contours]
max_index = np.argmax(areas)
max_cnt = contours[max_index]
max_cntArea = areas[max_index]
x, y, w, h = cv2.boundingRect(max_cnt)
if not ratioCheck(max_cntArea, w, h):
return plate, None
cleaned_final = thresh[y:y + h, x:x + w]
# cv2.imshow("Function Test",cleaned_final)
return cleaned_final, [x, y, w, h]
else:
return plate, None
def extract_contours(threshold_img):
# print ('extracting contours')
element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(17, 3))
morph_img_threshold = threshold_img.copy()
cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element, dst=morph_img_threshold)
cv2.imshow("Morphed", morph_img_threshold)
cv2.waitKey(0)
im2, contours, hierarchy = cv2.findContours(morph_img_threshold, mode=cv2.RETR_EXTERNAL,
method=cv2.CHAIN_APPROX_NONE)
return contours
def ratioCheck(area, width, height):
# print ('checking ratio')
ratio = float(width) / float(height)
if ratio < 1:
ratio = 1 / ratio
aspect = 4.7272
min = 15 * aspect * 15 # minimum area
max = 125 * aspect * 125 # maximum area
rmin = 3
rmax = 6
if (area < min or area > max) or (ratio < rmin or ratio > rmax):
return False
return True
def isMaxWhite(plate):
# print ('is Max white')
avg = np.mean(plate)
if (avg >= 115):
return True
else:
return False
def validateRotationAndRatio(rect):
# print( 'validate the rotation and ratio')
(x, y), (width, height), rect_angle = rect
if (width > height):
angle = -rect_angle
else:
angle = 90 + rect_angle
if angle > 15:
return False
if height == 0 or width == 0:
return False
area = height * width
if not ratioCheck(area, width, height):
return False
else:
return True
def cleanAndRead(img, contours):
# print ('clean and read')
# count=0
for i, cnt in enumerate(contours):
min_rect = cv2.minAreaRect(cnt)
if validateRotationAndRatio(min_rect):
x, y, w, h = cv2.boundingRect(cnt)
plate_img = img[y:y + h, x:x + w]
if (isMaxWhite(plate_img)):
# count+=1
clean_plate, rect = cleanPlate(plate_img)
if rect:
x1, y1, w1, h1 = rect
x, y, w, h = x + x1, y + y1, w1, h1
cv2.imshow("Cleaned Plate", clean_plate)
cv2.waitKey(0)
plate_im = Image.fromarray(clean_plate)
plate_im.save('donald1.png')
text = tess.image_to_string(plate_im, lang='eng')
# print text
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Detected Plate", img)
cv2.waitKey(0)
return text
numberplate = 0
img = cv2.imread("car_number_plate.jpg")
threshold_img = preprocess(img)
contours = extract_contours(threshold_img)
# if len(contours)!=0:
# print len(contours) #Test
# cv2.drawContours(img, contours, -1, (0,255,0), 1)
# cv2.imshow("Contours",img)
# cv2.waitKey(0)
plate = cleanAndRead(img, contours)
print ('plate information: ', plate)
If my number plate is: MH01AV8866
It will be recognised as MH01AY8866
Any suggestion will be appreciated. Let me know if any other information is required too.
You are using tesseract as a general model for your problem you can tune your model for that you need to generate synthetic data for your number plates with this
https://github.com/Belval/TextRecognitionDataGenerator
and then you can tune your model using the steps provided
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00---Finetune
https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00
I've tuned the tesseract on synthetic data and it works like a charm, tried CNN models and tesseract both and tesseract trains better with lesser data and gives better performance.
first of all, I am new to programming though I would like to learn especially python. my background in animation and CGI.
I have python 2.7 and openCV x64 installed on windows. I tested optical flow example they have (opt_flow.py) (the green arrows) I like that, but I am trying to understand how I can get the data out as values. I am not interested in seeing the camera output or the green arrows I just want the data out to use it later.is there a way to do that?
for example: the value of x, y and the length of the green arrows.
Thank you all
You can get the optical flow vectors (green arrows) in the draw_flow function of opt_flow.py. Here is how I would do it :
#!/usr/bin/env python
'''
example to show optical flow
USAGE: opt_flow.py [<video_source>]
Keys:
1 - toggle HSV flow visualization
2 - toggle glitch
Keys:
ESC - exit
'''
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import math
import cv2
import video
def draw_flow(img, flow, step=16):
global arrows
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (x2, y2) in lines:
arrows.append([x1,y1, math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))])
cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
def draw_hsv(flow):
h, w = flow.shape[:2]
fx, fy = flow[:,:,0], flow[:,:,1]
ang = np.arctan2(fy, fx) + np.pi
v = np.sqrt(fx*fx+fy*fy)
hsv = np.zeros((h, w, 3), np.uint8)
hsv[...,0] = ang*(180/np.pi/2)
hsv[...,1] = 255
hsv[...,2] = np.minimum(v*4, 255)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr
def warp_flow(img, flow):
h, w = flow.shape[:2]
flow = -flow
flow[:,:,0] += np.arange(w)
flow[:,:,1] += np.arange(h)[:,np.newaxis]
res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
return res
if __name__ == '__main__':
import sys
print(__doc__)
try:
fn = sys.argv[1]
except IndexError:
fn = 0
arrows = []
cam = video.create_capture(fn)
ret, prev = cam.read()
prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
show_hsv = False
show_glitch = False
cur_glitch = prev.copy()
while True:
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
flow = cv2.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
prevgray = gray
arrows.clear()
finalImg = draw_flow(gray,flow)
print(arrows)
cv2.imshow('flow', finalImg)
if show_hsv:
cv2.imshow('flow HSV', draw_hsv(flow))
if show_glitch:
cur_glitch = warp_flow(cur_glitch, flow)
cv2.imshow('glitch', cur_glitch)
ch = cv2.waitKey(5)
if ch == 27:
break
if ch == ord('1'):
show_hsv = not show_hsv
print('HSV flow visualization is', ['off', 'on'][show_hsv])
if ch == ord('2'):
show_glitch = not show_glitch
if show_glitch:
cur_glitch = img.copy()
print('glitch is', ['off', 'on'][show_glitch])
cv2.destroyAllWindows()
In the code above, I'm saving the optical flow vectors (start point coordinates and vector length) in the global variable arrows like so :
arrows.append([x1,y1, math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))])
with (x1, y1) the arrow's start point and (x2, y2) the arrow's end point.
Hope it helps.