I want to detect crop rows using aerial images(CRBD). I have done the necessary image processing like converting to grayscale, edge detection, skeletonization, Hough Transform(to identify and draw the lines), and I also set the accumulator angle to math.pi*4.0/180, which I varied time after time.
The algorithm works well at detection approximately 4 crop lines, I want to improve it so that it can detect variable number of crop rows, and it should be able to highlight this crop rows
Here is a link to the sample code I modified Here
import os
import os.path
import time
import cv2
import numpy as np
import math
### Setup ###
image_data_path = os.path.abspath('../8470p/CRBD/Images')
gt_data_path = os.path.abspath('../8470p/GT data')
image_out_path = os.path.abspath('../8470p/algorithm_1')
use_camera = False # whether or not to use the test images or camera
images_to_save = [2, 3, 4, 5] # which test images to save
timing = False # whether to time the test images
curr_image = 0 # global counter
HOUGH_RHO = 2 # Distance resolution of the accumulator in pixels
HOUGH_ANGLE = math.pi*4.0/18 # Angle resolution of the accumulator in radians
HOUGH_THRESH_MAX = 80 # Accumulator threshold parameter. Only those lines are
returned that get votes
HOUGH_THRESH_MIN = 10
HOUGH_THRESH_INCR = 1
NUMBER_OF_ROWS = 10 # how many crop rows to detect
THETA_SIM_THRESH = math.pi*(6.0/180) # How similar two rows can be
RHO_SIM_THRESH = 8 # How similar two rows can be
ANGLE_THRESH = math.pi*(30.0/180) # How steep angles the crop rows can be in
radians
def grayscale_transform(image_in):
'''Converts RGB to Grayscale and enhances green values'''
b, g, r = cv2.split(image_in)
return 2*g - r - b
def save_image(image_name, image_data):
'''Saves image if user requests before runtime'''
if curr_image in images_to_save:
image_name_new = os.path.join(image_out_path, "
{0}_{1}.jpg".format(image_name,
str(curr_image) ))
def skeletonize(image_in):
'''Inputs and grayscale image and outputs a binary skeleton image'''
size = np.size(image_in)
skel = np.zeros(image_in.shape, np.uint8)
ret, image_edit = cv2.threshold(image_in, 0, 255, cv2.THRESH_BINARY |
cv2.THRESH_OTSU)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
done = False
while not done:
eroded = cv2.erode(image_edit, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(image_edit, temp)
skel = cv2.bitwise_or(skel, temp)
image_edit = eroded.copy()
zeros = size - cv2.countNonZero(image_edit)
if zeros == size:
done = True
return skel
def tuple_list_round(tuple_list, ndigits_1=0, ndigits_2=0):
'''Rounds each value in a list of tuples to the number of digits
specified
'''
new_list = []
for (value_1, value_2) in tuple_list:
new_list.append( (round(value_1, ndigits_1), round(value_2,
ndigits_2)) )
return new_list
def crop_point_hough(crop_points):
'''Iterates though Hough thresholds until optimal value found for
the desired number of crop rows. Also does filtering.
'''
height = len(crop_points)
width = len(crop_points[0])
hough_thresh = HOUGH_THRESH_MAX
rows_found = False
while hough_thresh > HOUGH_THRESH_MIN and not rows_found:
crop_line_data = cv2.HoughLines(crop_points, HOUGH_RHO, HOUGH_ANGLE,
hough_thresh)
crop_lines = np.zeros((height, width, 3), dtype=np.uint8)
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
if crop_line_data is not None:
# get rid of duplicate lines. May become redundant if a similarity
threshold is done
crop_line_data_1 = tuple_list_round(crop_line_data[:,0,:],-1, 4)
crop_line_data_2 = []
x_offsets = []
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
for (rho, theta) in crop_line_data_1:
a = math.cos(theta)
b = math.sin(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines_hough, point1, point2, (0, 0, 255), 2)
for curr_index in range(len(crop_line_data_1)):
(rho, theta) = crop_line_data_1[curr_index]
is_faulty = False
if ((theta >= ANGLE_THRESH) and (theta <= math.pi-
ANGLE_THRESH)) or(theta <= 0.001):
is_faulty = True
else:
for (other_rho, other_theta) in
crop_line_data_1[curr_index+1:]:
if abs(theta - other_theta) < THETA_SIM_THRESH:
is_faulty = True
elif abs(rho - other_rho) < RHO_SIM_THRESH:
is_faulty = True
if not is_faulty:
crop_line_data_2.append( (rho, theta) )
for (rho, theta) in crop_line_data_2:
a = math.cos(theta)
b = math.sin(theta)
c = math.tan(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho+c*
#(0.5*height))),0 ,239), 0), 4, (255,0,0), -1)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho-c*
#(0.5*height))),0 ,239), height), 4, (255,0,0), -1)
cv2.circle(crop_lines, (np.clip(int(round(rho/a)),0 ,239), 0), 5,
(255,0,0), -1)
#cv2.circle(img,(447,63), 63, (0,0,255), -1)
x_offsets.append(np.clip(int(round(rho/a)),0 ,239))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
if len(crop_line_data_2) >= NUMBER_OF_ROWS:
rows_found = True
hough_thresh -= HOUGH_THRESH_INCR
if rows_found == False:
print(NUMBER_OF_ROWS, "rows_not_found")
x_offset = min (x_offsets)
width = max (x_offsets) - min (x_offsets)
return (crop_lines, crop_lines_hough, x_offset, width)
def crop_row_detect(image_in):
'''Inputs an image and outputs the lines'''
save_image('0_image_in', image_in)
### Grayscale Transform ###
image_edit = grayscale_transform(image_in)
save_image('1_image_gray', image_edit)
### Skeletonization ###
skeleton = skeletonize(image_edit)
save_image('2_image_skeleton', skeleton)
### Hough Transform ###
(crop_lines, crop_lines_hough, x_offset, width) =
crop_point_hough(skeleton)
save_image('3_image_hough',cv2.addWeighted(image_in, 1,
crop_lines_hough, 1, 0.0))
save_image('4_image_lines',cv2.addWeighted(image_in, 1,crop_lines,1,0.0))
return (crop_lines , x_offset, width)
def main():
if use_camera == False:
diff_times = []
for image_name in sorted(os.listdir(image_data_path)):
global curr_image
curr_image += 1
start_time = time.time()
image_path = os.path.join(image_data_path, image_name)
image_in = cv2.imread(image_path)
crop_lines = crop_row_detect(image_in)
if timing == False:
cv2.imshow(image_name, cv2.addWeighted(image_in, 1,
crop_lines, 1, 0.0))
print('Press any key to continue...')
cv2.waitKey()
cv2.destroyAllWindows()
### Timing ###
else:
diff_times.append(time.time() - start_time)
mean = 0
for diff_time in diff_times:
mean += diff_time
### Display Timing ###
print('max time = {0}'.format(max(diff_times)))
print('ave time = {0}'.format(1.0 * mean / len(diff_times)))
cv2.waitKey()
else: # use camera. Hasn't been tested on a farm.
capture = cv2.VideoCapture(0)
while cv2.waitKey(1) < 0:
_, image_in = capture.read()
(crop_lines, x_offset, width) = crop_row_detect(image_in)
cv2.imshow("Webcam", cv2.addWeighted(image_in, 1, crop_lines, 1,
0.0))
capture.release()
cv2.destroyAllWindows()
main()
Input Image
[![Input Image][1]][1]
Output Image
[![][4]][4]
Expected Output
[![Expected Output][5]][5]
I have tried thresholding with cv2.inRange() to find green lines, but am still not getting the desired out.
Also the algorithms seems to be only draw the crop_line_data_2 as shown in the Output Image, it doesn't draw the crop_line_data_1
def threshold_green(image_in):
hsv = cv2.cvtColor(image_in, cv2.COLOR_BGR2HSV)
## mask of green (36,25,25) ~ (86, 255,255)
# mask = cv2.inRange(hsv, (36, 25, 25), (86, 255,255))
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
## slice the green
imask = mask>0
green = np.zeros_like(image_in, np.uint8)
green[imask] = image_in[imask]
return green
Related
`import cv2
import numpy as np
import time
from tensorflow.keras.models import load_model
sign_model = load_model('best_model.h5')
def detect_lines(image):
tuning min_threshold, minLineLength, maxLineGap is a trial and error process by hand
rho = 1 # precision in pixel, i.e. 1 pixel
angle = np.pi / 180 # degree in radian, i.e. 1 degree
min_threshold = 10 # minimal of votes
lines = cv2.HoughLinesP(image, rho, angle, min_threshold, np.array([]), minLineLength=8,
maxLineGap=4)
return lines
def mean_lines(frame, lines):
a = np.zeros_like(frame)
try:
left_line_x = []
left_line_y = []
right_line_x = []
right_line_y = []
for line in lines:
for x1, y1, x2, y2 in line:
slope = (y2 - y1) / (x2 - x1) # <-- Calculating the slope.
if abs(slope) < 0.5: # <-- Only consider extreme slope
continue
if slope <= 0: # <-- If the slope is negative, left group.
left_line_x.extend([x1, x2])
left_line_y.extend([y1, y2])
else: # <-- Otherwise, right group.
right_line_x.extend([x1, x2])
right_line_y.extend([y1, y2])
min_y = int(frame.shape[0] * (3 / 5)) # <-- Just below the horizon
max_y = int(frame.shape[0]) # <-- The bottom of the image
poly_left = np.poly1d(np.polyfit(
left_line_y,
left_line_x,
deg=1
))
left_x_start = int(poly_left(max_y))
left_x_end = int(poly_left(min_y))
poly_right = np.poly1d(np.polyfit(
right_line_y,
right_line_x,
deg=1
))
right_x_start = int(poly_right(max_y))
right_x_end = int(poly_right(min_y))
cv2.line(a, (left_x_start, max_y), (left_x_end, min_y), [255,255,0], 5)
cv2.line(a, (right_x_start, max_y), (right_x_end, min_y), [255,255,0], 5)
current_pix = (left_x_end+right_x_end)/2
except:
current_pix = 128
return a, current_pix
def region_of_interest(image):
(height, width) = image.shape
mask = np.zeros_like(image)
polygon = np.array([[
(0, height),
(0, 180),
(80, 130),
(256-80,130),
(width, 180),
(width, height),
np.int32)
cv2.fillPoly(mask, polygon, 255)
masked_image = image * (mask)
masked_image[:170,:]=0
return masked_image
def horiz_lines(mask):
roi = mask[160:180, 96:160]
try:
lines = detect_lines(roi)
lines = lines.reshape(-1,2,2)
slope = (lines[:,1,1]-lines[:,0,1]) / (lines[:,1,0]-lines[:,0,0])
if (lines[np.where(abs(slope)<0.2)]).shape[0] != 0:
detected = True
else:
detected = False
except:
detected = False
return detected
def turn_where(mask):
roi = mask[100:190, :]
cv2.imshow('turn where', roi)
lines = detect_lines(roi)
lines = lines.reshape(-1,2,2)
slope = (lines[:,1,1]-lines[:,0,1]) / (lines[:,1,0]-lines[:,0,0])
mean_pix = np.mean(lines[np.where(abs(slope)<0.2)][:,:,0])
return mean_pix
def detect_side(side_mask):
side_pix = np.mean(np.where(side_mask[150:190, :]>0), axis=1)[1]
return side_pix
def detect_sign(frame, hsv_frame):
types = ['left', 'straight', 'right']
mask = cv2.inRange(hsv_frame, np.array([100,160,90]), np.array([160,220,220]))
mask[:30,:]=0
try:
points, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_points = sorted(points, key=len)
if cv2.contourArea(sorted_points[-1])>30:
x,y,w,h = cv2.boundingRect(sorted_points[-1])
if (x>5) and (x+w<251) and (y>5) and (y+h<251):
sign = frame[y:y+h,x:x+w]
sign = cv2.resize(sign, (25,25))/255
frame = cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,255),2)
return types[np.argmax(sign_model.predict(sign.reshape(1,25,25,3)))]
else:
return 'nothing'
else:
return 'nothing'
except:
return 'nothing'
def red_sign_state(red_mask):
points, _ = cv2.findContours(red_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_points = sorted(points, key=len)
try:
red_area = cv2.contourArea(sorted_points[-1])
if red_area > 50:
print('red sign detected!')
return True
else:
return False
except:
return False
def stop_the_car(car):
car.setSteering(0)
while car.getSpeed():
car.setSpeed(-100)
car.getData()
car.setSpeed(0)
return True
def turn_the_car(car,s,t):
time1 = time.time()
while((time.time()-time1)<t):
car.getData()
car.setSteering(s)
car.setSpeed(15)
def go_back(car, t):
time1 = time.time()
while((time.time()-time1)<t):
car.getData()
car.setSpeed(-15)
car.setSpeed(0)
`
i want to build 3d scanner with stereo camera and LED Projector. I calibrated the stereo camera and created the mechanical system. I project gray code and capture them with two camera. I decoded the images and take correspondence image. But i can't triangulate the decoded points. This is my reference project for generate pattern and decode them.
Reference Github project
And this is my code
import numpy as np
import cv2
import structuredlight as sl
def generate_rectify_data(calib_data, size):
XML_File = cv2.FileStorage(calib_data, cv2.FILE_STORAGE_READ)
M1 = XML_File.getNode('Camera_Matrix_Left').mat()
M2 = XML_File.getNode('Camera_Matrix_Right').mat()
d1 = XML_File.getNode('Camera_Distortion_Left').mat()
d2 = XML_File.getNode('Camera_Distortion_Right').mat()
R = XML_File.getNode('Rotation_Matrix').mat()
t = XML_File.getNode('Translation_Matrix').mat()
flag = cv2.CALIB_ZERO_DISPARITY
R1, R2, P1, P2, Q = cv2.stereoRectify(cameraMatrix1=M1, cameraMatrix2=M2,
distCoeffs1=d1, distCoeffs2=d2, R=R, T=t, flags=flag, alpha=-1, imageSize=size,
newImageSize=size)[0:5]
map_x_l, map_y_l = cv2.initUndistortRectifyMap(M1, d1, R1, P1, size, cv2.CV_32FC1)
map_x_r, map_y_r = cv2.initUndistortRectifyMap(M2, d2, R2, P2, size, cv2.CV_32FC1)
return map_x_l, map_y_l, map_x_r, map_y_r, P1, P2,Q
def rectify(img, map_x, map_y):
res = cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR, cv2.BORDER_CONSTANT)
return res
"""
***I generate pattern like this and capture them in another python script***
W = 240
H = 240
gray = sl.Gray()
imlist_posi_x_pat = gray.generate((W, H))
imlist_posi_y_pat = sl.transpose(gray.generate((H, W)))
"""
if __name__ == '__main__':
img_size = (1648, 1232)
map_x_l, map_y_l, map_x_r, map_y_r, P1, P2, Q = generate_rectify_data(
"C:/Users/XXX/PycharmProjects/Stereo_Structured_Light/Calibration_Data"
"/Calibration_Parameters_1.xml", size=(1648, 1232))
rect_list_l, rect_list_r = [], []
imlist_posi_x_cap_R = []
imlist_posi_y_cap_R = []
imlist_posi_x_cap_L = []
imlist_posi_y_cap_L = []
for i in range(0,16):
img_l = cv2.imread("C:/OxO_Scan/Images_1/Left_cam3/L_" + str(i) + ".png", 0)
img_r = cv2.imread("C:/OxO_Scan/Images_1/Right_cam3/R_" + str(i) + ".png", 0)
l_rect = rectify(img_l, map_x_l, map_y_l)
r_rect = rectify(img_r, map_x_r, map_y_r)
if i < 8: # 8 for the horizontal, 8 for the vertical pattern images
imlist_posi_x_cap_R.append(r_rect)
imlist_posi_x_cap_L.append(l_rect)
else:
imlist_posi_y_cap_R.append(r_rect)
imlist_posi_y_cap_L.append(l_rect)
W = 240
H = 240
gray = sl.Gray()
img_index_x_R = gray.decode(imlist_posi_x_cap_R, thresh=40)
img_index_x_L = gray.decode(imlist_posi_x_cap_L, thresh=40)
img_index_y_R = gray.decode(imlist_posi_y_cap_R, thresh=40)
img_index_y_L = gray.decode(imlist_posi_y_cap_L, thresh=40)
img_correspondence_x_r = cv2.merge([0.0 * np.zeros_like(img_index_x_R),
img_index_x_R / W, img_index_y_R / H])
img_correspondence_r = np.clip(img_correspondence_x_r * 255.0, 0,
255).astype(np.uint8)
img_correspondence_y_l = cv2.merge([0.0 * np.zeros_like(img_index_x_L),
img_index_x_L / W, img_index_y_L / H])
img_correspondence_l = np.clip(img_correspondence_y_l * 255.0, 0,
255).astype(np.uint8)
####################################
cv2.imshow("a", cv2.resize(img_correspondence_l, (640, 480)))
cv2.imshow("b", cv2.resize(img_correspondence_r, (640, 480)))
cv2.waitKey()
cv2.destroyAllWindows()
img_correspondence_L_2 = np.copy(img_correspondence_l)
img_correspondence_R_2 = np.copy(img_correspondence_r)
cam_pts_l, cam_pts_r = [], []
cam_pts_l2, cam_pts_r2 = [], []
for i in range(img_correspondence_l.shape[0]):
for j in range(img_correspondence_l.shape[1]):
if (img_correspondence_l[i][j] != 0).any():
qwert = img_index_x_L[i][j]
cam_pts_l.append([j, i])
cam_pts_r.append([j+qwert, i])
cam_pts_l = np.array(cam_pts_l, dtype=np.float32)
cam_pts_r = np.array(cam_pts_r, dtype=np.float32)
cam_pts_l = np.array(cam_pts_l)[:, np.newaxis, :]
cam_pts_r = np.array(cam_pts_r)[:, np.newaxis, :]
pts4D = cv2.triangulatePoints(P1, P2, np.float32(cam_pts_l),np.float32(cam_pts_r)).T
pts3D = (pts4D[:, :3] / pts4D[:, -1:])
I don't know what to do in code for loop "for i in range(img_correspondence_l.shape[0]):" For example, I cannot find where a point that I found using left correspondence image in the left camera image corresponds to in the right camera image. j+img_index_x_L[i][j] does not give correct results. What should i do here
Thanks for your answers
I have a little project with OpenCV (python) where one of my steps is to take an x-ray image from the human body and convert it to a binary image where white pixels represent where some bone is present and black means there is no bone there.
Since sometimes "bone parts" can be darker than "non-bone parts" from another region, simple thresholding won't work. I also tried adaptive threshold and I couldn't see much difference.
I came up with a simple algorithm that applies a simple threshold for each row.
Here is the code:
def threshhold(image, val):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
And here is the code that does the same work but column-by-column instead of row-by-row:
def threshhold2(image, val):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = np.median(image[:, row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
This method works pretty well with images like this:
Not quite well for this one but it is not that bad:
Very terrible:
Only the left half looks good
...
As you can see; this algorithm works well only for some images.
I will be glad to see more experienced people's ideas.
Images are not for me by the way.
Entire source code:
import os
import cv2
import numpy as np
files_to_see = os.listdir("data_set")
files_to_see.sort()
current_file = 0
print(files_to_see)
def slice(image, size):
out = []
x_count = image.shape[1] // size
y_count = image.shape[0] // size
for y_idx in range(y_count):
for x_idx in range(x_count):
out.append(
(
(y_idx, x_idx),
image[y_idx * size: (y_idx + 1) * size,
x_idx * size: (x_idx + 1) * size]
)
)
return y_count, x_count, out
def normalize(image):
image = image.copy()
min_pix = image.min()
max_pix = image.max()
for y in range(image.shape[0]):
for x in range(image.shape[1]):
val = image[y, x]
val -= min_pix
val *= 255 / (max_pix - min_pix)
image[y, x] = round(val)
# image -= min_pix
# image *= round(255 / (max_pix - min_pix))
return image
def threshhold(image, val, method):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
# tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
tresh = method(image[row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
def threshhold2(image, val, method):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = method(image[:, row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
def recalculate_threshhold(v):
global original_current_image, thresh_current_image, y_c, x_c, slices
method = np.mean
if cv2.getTrackbarPos("method", "xb labeler") == 0:
method = np.median
thresh_current_image = threshhold2(original_current_image, cv2.getTrackbarPos("threshhold_value", "xb labeler") / 1000, method)
y_c, x_c, slices = slice(thresh_current_image, 128)
def thresh_current_image_mouse_event(event, x, y, flags, param):
if event == 1:
print(x // 128, y // 128)
cv2.imshow("slice", slices[(x // 128) + (y // 128) * x_c][1])
cv2.namedWindow("xb labeler")
cv2.createTrackbar("threshhold_value", "xb labeler", 0, 1000, recalculate_threshhold)
cv2.createTrackbar("method", "xb labeler", 0, 1, recalculate_threshhold)
cv2.namedWindow("thresh_current_image")
cv2.setMouseCallback("thresh_current_image", thresh_current_image_mouse_event)
def init():
global original_current_image, thresh_current_image, x_c, y_c, slices, files_to_see, current_file
original_current_image = cv2.imread("data_set/" + files_to_see[current_file], cv2.CV_8UC1)
original_current_image = cv2.resize(original_current_image, (512, 512))
original_current_image = normalize(original_current_image)
original_current_image = cv2.GaussianBlur(original_current_image, (5, 5), 10)
recalculate_threshhold(1)
y_c, x_c, slices = slice(thresh_current_image, 128)
init()
while True:
cv2.imshow("thresh_current_image", thresh_current_image)
cv2.imshow("xb labeler", original_current_image)
k = cv2.waitKey(1)
if k == ord('p'):
cv2.imwrite("ssq.png", thresh_current_image)
current_file += 1
init()
cv2.destroyAllWindows()
EDIT: Added original images:
I found some example solution a detected common element on the two images.
It's code on Python with OpenCV and I execute on my two examples images:
'''
Feature-based image matching sample.
Note, that you will need the https://github.com/opencv/opencv_contrib repo for SIFT and SURF
USAGE
find_obj.py [--feature=<sift|surf|orb|akaze|brisk>[-flann]] [ <image1> <image2> ]
--feature - Feature to use. Can be sift, surf, orb or brisk. Append '-flann'
to feature name to use Flann-based matcher instead bruteforce.
Press left mouse button on a feature point to see its matching point.
'''
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2 as cv
from common import anorm, getsize
FLANN_INDEX_KDTREE = 1 # bug: flann enums are missing
FLANN_INDEX_LSH = 6
def init_feature(name):
chunks = name.split('-')
if chunks[0] == 'sift':
detector = cv.xfeatures2d.SIFT_create()
norm = cv.NORM_L2
elif chunks[0] == 'surf':
detector = cv.xfeatures2d.SURF_create(200)
norm = cv.NORM_L2
elif chunks[0] == 'orb':
detector = cv.ORB_create(1400)
norm = cv.NORM_HAMMING
elif chunks[0] == 'akaze':
detector = cv.AKAZE_create()
norm = cv.NORM_HAMMING
elif chunks[0] == 'brisk':
detector = cv.BRISK_create()
norm = cv.NORM_HAMMING
else:
return None, None
if 'flann' in chunks:
if norm == cv.NORM_L2:
flann_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
else:
flann_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
matcher = cv.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329)
else:
matcher = cv.BFMatcher(norm)
return detector, matcher
def filter_matches(kp1, kp2, matches, ratio = 0.75):
mkp1, mkp2 = [], []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
m = m[0]
mkp1.append( kp1[m.queryIdx] )
mkp2.append( kp2[m.trainIdx] )
p1 = np.float32([kp.pt for kp in mkp1])
p2 = np.float32([kp.pt for kp in mkp2])
kp_pairs = zip(mkp1, mkp2)
return p1, p2, list(kp_pairs)
def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((max(h1, h2), w1+w2), np.uint8)
vis[:h1, :w1] = img1
vis[:h2, w1:w1+w2] = img2
vis = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
if H is not None:
corners = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]])
corners = np.int32( cv.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2) + (w1, 0) )
cv.polylines(vis, [corners], True, (255, 255, 255))
if status is None:
status = np.ones(len(kp_pairs), np.bool_)
p1, p2 = [], [] # python 2 / python 3 change of zip unpacking
for kpp in kp_pairs:
p1.append(np.int32(kpp[0].pt))
p2.append(np.int32(np.array(kpp[1].pt) + [w1, 0]))
green = (0, 255, 0)
red = (0, 0, 255)
kp_color = (51, 103, 236)
for (x1, y1), (x2, y2), inlier in zip(p1, p2, status):
if inlier:
col = green
cv.circle(vis, (x1, y1), 2, col, -1)
cv.circle(vis, (x2, y2), 2, col, -1)
else:
col = red
r = 2
thickness = 3
cv.line(vis, (x1-r, y1-r), (x1+r, y1+r), col, thickness)
cv.line(vis, (x1-r, y1+r), (x1+r, y1-r), col, thickness)
cv.line(vis, (x2-r, y2-r), (x2+r, y2+r), col, thickness)
cv.line(vis, (x2-r, y2+r), (x2+r, y2-r), col, thickness)
vis0 = vis.copy()
for (x1, y1), (x2, y2), inlier in zip(p1, p2, status):
if inlier:
cv.line(vis, (x1, y1), (x2, y2), green)
cv.imshow(win, vis)
def onmouse(event, x, y, flags, param):
cur_vis = vis
if flags & cv.EVENT_FLAG_LBUTTON:
cur_vis = vis0.copy()
r = 8
m = (anorm(np.array(p1) - (x, y)) < r) | (anorm(np.array(p2) - (x, y)) < r)
idxs = np.where(m)[0]
kp1s, kp2s = [], []
for i in idxs:
(x1, y1), (x2, y2) = p1[i], p2[i]
col = (red, green)[status[i][0]]
cv.line(cur_vis, (x1, y1), (x2, y2), col)
kp1, kp2 = kp_pairs[i]
kp1s.append(kp1)
kp2s.append(kp2)
cur_vis = cv.drawKeypoints(cur_vis, kp1s, None, flags=4, color=kp_color)
cur_vis[:,w1:] = cv.drawKeypoints(cur_vis[:,w1:], kp2s, None, flags=4, color=kp_color)
cv.imshow(win, cur_vis)
cv.setMouseCallback(win, onmouse)
return vis
if __name__ == '__main__':
print(__doc__)
import sys, getopt
opts, args = getopt.getopt(sys.argv[1:], '', ['feature='])
opts = dict(opts)
feature_name = opts.get('--feature', 'brisk')
try:
fn1, fn2 = args
except:
fn1 = '../data/box.png'
fn2 = '../data/box_in_scene.png'
img1 = cv.imread(fn1, 0)
img2 = cv.imread(fn2, 0)
detector, matcher = init_feature(feature_name)
if img1 is None:
print('Failed to load fn1:', fn1)
sys.exit(1)
if img2 is None:
print('Failed to load fn2:', fn2)
sys.exit(1)
if detector is None:
print('unknown feature:', feature_name)
sys.exit(1)
print('using', feature_name)
kp1, desc1 = detector.detectAndCompute(img1, None)
kp2, desc2 = detector.detectAndCompute(img2, None)
print('img1 - %d features, img2 - %d features' % (len(kp1), len(kp2)))
def match_and_draw(win):
print('matching...')
raw_matches = matcher.knnMatch(desc1, trainDescriptors = desc2, k = 2) #2
p1, p2, kp_pairs = filter_matches(kp1, kp2, raw_matches)
if len(p1) >= 4:
H, status = cv.findHomography(p1, p2, cv.RANSAC, 5.0)
print('%d / %d inliers/matched' % (np.sum(status), len(status)))
else:
H, status = None, None
print('%d matches found, not enough for homography estimation' % len(p1))
_vis = explore_match(win, img1, img2, kp_pairs, status, H)
match_and_draw('find_obj')
cv.waitKey()
cv.destroyAllWindows()
Its original images
Its images with some changes
And result in Python Code like this:
python diff_good.py --feature=surf images/org_web.png images/change1.png
Result detected common object
I try to find some solution - how change markered type of common object for example: instead show area with many colors point to some contours, shape (square, rectangle etc.) something like this :
This is some example second images with markered by green rectangles commons element - block
example result images
I want to change this code to get markered only common contours - the area without many points or line like now, because it's not clear to present and understood.
Maybe will be the best solution make third images - result output image with markered only common area like rectangles objects or contours.
But after many hours spend for search solution on google, forums and groups I failed to change this code to act as it needs, maybe here I will find some help, example or suggestions how to modify it.
I found this repo x-img-diff.
They cluster the keypoints into the same boxes and use some other methods to handle the boxes. You can try this way.
I'm trying to use stereo_match.py to make a disparity map and then generate 3D points cloud, but I'm facing serious problems.
Left image
I use these 2 pictures. And there is the result with the code :
import numpy as np
import cv2
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
def write_ply(fn, verts, colors):
verts = verts.reshape(-1, 3)
colors = colors.reshape(-1, 3)
verts = np.hstack([verts, colors])
with open(fn, 'w') as f:
f.write(ply_header % dict(vert_num=len(verts)))
np.savetxt(f, verts, '%f %f %f %d %d %d')
if __name__ == '__main__':
print 'loading images...'
imgL = cv2.pyrDown( cv2.imread('1.png') ) # downscale images for faster processing
imgR = cv2.pyrDown( cv2.imread('2.png') )
# disparity range is tuned for 'aloe' image pair
window_size = 3
min_disp = 16
num_disp = 112-min_disp
stereo = cv2.StereoSGBM(minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = window_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*window_size**2,
P2 = 32*3*window_size**2,
fullDP = False
)
print 'computing disparity...'
disp = stereo.compute(imgL, imgR).astype(np.float32) / 16.0
print 'generating 3d point cloud...',
h, w = imgL.shape[:2]
f = 0.8*w # guess for focal length
Q = np.float32([[1, 0, 0, -0.5*w],
[0,-1, 0, 0.5*h], # turn points 180 deg around x-axis,
[0, 0, 0, -f], # so that y-axis looks up
[0, 0, 1, 0]])
points = cv2.reprojectImageTo3D(disp, Q)
colors = cv2.cvtColor(imgL, cv2.COLOR_BGR2RGB)
mask = disp > disp.min()
out_points = points[mask]
out_colors = colors[mask]
out_fn = 'out.ply'
write_ply('out.ply', out_points, out_colors)
print '%s saved' % 'out.ply'
cv2.imshow('left', imgL)
cv2.imshow('disparity', (disp-min_disp)/num_disp)
cv2.waitKey()
cv2.destroyAllWindows()
Result
Can someone please help me ? I think I have to change some parameters, but I have no clue where...
Thanks !