I was tasked to create a football players detection using OpenCV (4.5.5) and python and found a problem where my notebook is returning these error codes but I am not knowledgeable enough in python and openCV to fix this:
<ipython-input-12-4322b53719b8> in <module>
19
20 net.setInput(blob)
---> 21 outs = net.forward(output_layers)
22 outs = get_players(outs, height, width)
23 for i in range(len(outs)):
error: OpenCV(4.5.5) :-1: error: (-5:Bad argument) in function 'forward'
> Overload resolution failed:
> - Can't convert object to 'str' for 'outputName'
> - Can't parse 'outputBlobs'. Input argument doesn't provide sequence protocol
> - Can't parse 'outputBlobs'. Input argument doesn't provide sequence protocol
> - Can't parse 'outBlobNames'. Input argument doesn't provide sequence protocol
> - Can't parse 'outBlobNames'. Input argument doesn't provide sequence protocol
OpenCV(4.5.5) :-1: error: (-5:Bad argument) in function 'forward'
> Overload resolution failed:
> - Can't convert object to 'str' for 'outputName'
> - Can't parse 'outputBlobs'. Input argument doesn't provide sequence protocol
> - Can't parse 'outputBlobs'. Input argument doesn't provide sequence protocol
> - Can't parse 'outBlobNames'. Input argument doesn't provide sequence protocol
> - Can't parse 'outBlobNames'. Input argument doesn't provide sequence protocol
Here's my full code that i'm running, need some guidance to resolve this issue or perhaps anyone has done a football video analysis using openCV and python. Thanks.
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
from numpy import argmax
model = load_model(r'C:/Users/Benjamin Tiopan/players-matching/files/model.h5')
cap = cv2.VideoCapture(r'C:/Users/Benjamin Tiopan/players-matching/data/video.avi')
temp=cv2.imread(r'C:/Users/Benjamin Tiopan/players-matching/data/temp.jpg',0)
ground=cv2.imread(r'C:/Users/Benjamin Tiopan/players-matching/data/dst.jpg')
wt, ht = temp.shape[::-1]
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
#if you want to write video
out = cv2.VideoWriter('match.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 20, (1920,1080))
out2 = cv2.VideoWriter('plane.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 20, (900,600))
if (cap.isOpened()== False):
print("Error opening video stream or file")
# Load Yolo
net = cv2.dnn.readNet('C:/Users/Benjamin Tiopan/players-matching/files/yolov3.weights', 'C:/Users/Benjamin Tiopan/players-matching/files/yolov3.cfg')
classes = []
with open('C:/Users/Benjamin Tiopan/players-matching/files/coco.names', "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = (layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers())
colors = np.random.uniform(0, 255, size=(len(classes), 3))
def plane(players,ball):
coptemp=ground.copy()
matrix=np.array([[ 2.56945407e-01, 5.90910632e-01, 1.94094537e+02],
[-1.33508274e-02, 1.37658562e+00, -8.34967286e+01],
[-3.41878940e-05, 1.31509536e-03, 1.00000000e+00]])
for p in players:
x=p[0]+int(p[2]/2)
y=p[1]+p[3]
pts3 = np.float32([[x,y]])
pts3o=cv2.perspectiveTransform(pts3[None, :, :],matrix)
x1=int(pts3o[0][0][0])
y1=int(pts3o[0][0][1])
pp=(x1,y1)
if(p[4]==0):
cv2.circle(coptemp,pp, 15, (255,0,0),-1)
elif p[4]==1:
cv2.circle(coptemp,pp, 15, (255,255,255),-1)
elif p[4]==2:
#print hakm
#cv2.circle(coptemp,pp, 15, (0,0,255),-1)
pass
if len(ball) !=0:
xb=ball[0]+int(ball[2]/2)
yb=ball[1]+int(ball[3]/2)
pts3ball = np.float32([[xb,yb]])
pts3b=cv2.perspectiveTransform(pts3ball[None, :, :],matrix)
x2=int(pts3b[0][0][0])
y2=int(pts3b[0][0][1])
pb=(x2,y2)
cv2.circle(coptemp,pb, 15, (0,0,0),-1)
return coptemp
def get_players(outs,height, width):
class_ids = []
confidences = []
boxes = []
players=[]
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
if label=='person':
players.append(boxes[i])
return players
opr=0
while(cap.isOpened()):
ret, frame = cap.read()
players=[]
ball=[]
if opr<310:
opr=opr+1
continue
if ret == True :
copy=frame.copy()
gray= cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
outs = get_players(outs, height, width)
for i in range(len(outs)):
x, y, w, h = outs[i]
roi = frame[y:y+h,x:x+w]
#some frames are bad so resize function throw an error
try:
roi=cv2.resize(roi, (96,96))
except:
continue
ym=model.predict(np.reshape(roi,(1,96,96,3)))
ym=argmax(ym)
players.append([x,y,w,h,ym])
if ym==0:
cv2.rectangle(copy, (x, y), (x + w, y + h), (0,0,255), 2)
elif ym==1:
cv2.rectangle(copy, (x, y), (x + w, y + h), (0,255,0), 2)
elif ym==2:
cv2.rectangle(copy, (x, y), (x + w, y + h), (255,0,0), 2)
res = cv2.matchTemplate(gray,temp,cv2.TM_SQDIFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if min_val < 0.05:
top_left = min_loc
bottom_right = (top_left[0] + wt, top_left[1] + ht)
ball.append(top_left[0])
ball.append(top_left[1])
ball.append(wt)
ball.append(ht)
cv2.rectangle(copy,top_left, bottom_right, (0,255,100), 2)
p=plane(players, ball)
out.write(copy)
out2.write(p)
cv2.imshow('img',copy)
cv2.imshow('plane',p)
# this will run the video without stop and maybe the cv2 window will stop between every frame
# depending on your pc power ( i recommend to use (opencv with gpu) and colab to run script quickly )
# if you want script stop between every frame and manually you allow the script to continue change it t ocv2.waitKey(0)
if cv2.waitKey(1)==27:
break
# When everything done, release the video capture object
cap.release()
out.release()
out2.release()
# Closes all the frames
cv2.destroyAllWindows()
Related
So I have am working with YOLOv4 to process video frames for object detection of one class : Human and every time a Human is detected in frame it prints a line in the terminal " Number of human detected :" and gives the number of human detected in a particular frame. Now I want the code to run as it is but instead of printing the above output for every frame, it should print the output of the videoframe it processes at the first 1 min mark and there after at every 3 min mark till the video is fully processed. So for a 5 min video, i would want the statement to be printed at the following videotimestamps: 1:00, 4:00. For a 8 min video it would be: 1:00, 4:00, 7:00.... and so on. I tried using schedule module but it seems to just schedule the entire code to run after 1 min.
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import datetime
import schedule
import time
from time import sleep
file = "test2"
input = "C:/Users/asmita.nandi/Downloads/" + file + ".mp4"
output = "C:/Users/asmita.nandi/Downloads/" + file + ".avi"
net = cv2.dnn.readNet( "C:/Users/asmita.nandi/Downloads/custom-yolov4-tiny_human-608
(1).cfg","C:/Users/asmita.nandi/Downloads/custom-yolov4-tiny-detector_human.weights")
labelsPath = "C:/Users/asmita.nandi/Downloads/human_label.txt"
def event(input,output,net,labelsPath):
LABELS = open(labelsPath).read().strip().split("\n")
np.random.seed(1)
cmap = plt.get_cmap('tab20b')
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 6)]
CONF_THRESH, NMS_THRESH = 0.25, 0.25
vs = cv2.VideoCapture(input)
fp = vs.get(cv2.CAP_PROP_FPS)
writer = None
W = None
H = None
totalFrames = 0
TotalHuman = 0
while True:
frame = vs.read()
frame = frame[1] if input else frame
if input is not None and frame is None:
break
(H, W) = frame.shape[:2]
print(H,W)
if W is None or H is None:
(H, W) = frame.shape[:2]
if output is not None and writer is None:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(output, fourcc,fp,(W,H), True)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (608,608), swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
print(layerOutputs)
end = time.time()
boxes = []
confidences = []
classIDs = []
(H, W) = frame.shape[:2]
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
scores = detection[5:]
#print(detection)
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > CONF_THRESH:
#print("Box")
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONF_THRESH,NMS_THRESH)
ObjectCount = {}
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
color = colors[classIDs[i]]
color = [i * 255 for i in color]
class_name = LABELS[classIDs[i]]
cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
# if class_name == "Human":
cv2.putText(frame, class_name,(x, y-10),0, 0.5,color,2)
obj, conf = LABELS[classIDs[i]], confidences[i]
if obj not in ObjectCount.keys():
ObjectCount[obj] = 1
else:
ObjectCount[obj] += 1
allvalues=[]
allvalues.append(ObjectCount[obj])
print("Number of Humans detected ", max(allvalues))
if writer is not None:
writer.write(frame)
# show the output frame
#cv2_imshow(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# increment the total number of frames processed thus far and
# then update the FPS counter
totalFrames += 1
fps.update()
# stop the timer and display FPS information
#for (objectID, centroid) in objects.items():
#print(objectID, centroids)
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print(totalFrames)
#print(info)
# check to see if we need to release the video writer pointer
if writer is not None:
writer.release()
# if we are not using a video file, stop the camera video stream
if not input:
vs.stop()
# otherwise, release the video file pointer
else:
vs.release()
# close any open windows
cv2.destroyAllWindows()
schedule.every(1).minutes.do(event(input,output,net,labelsPath))
while 1:
schedule.run_pending()
time.sleep(1)
I have a little project with OpenCV (python) where one of my steps is to take an x-ray image from the human body and convert it to a binary image where white pixels represent where some bone is present and black means there is no bone there.
Since sometimes "bone parts" can be darker than "non-bone parts" from another region, simple thresholding won't work. I also tried adaptive threshold and I couldn't see much difference.
I came up with a simple algorithm that applies a simple threshold for each row.
Here is the code:
def threshhold(image, val):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
And here is the code that does the same work but column-by-column instead of row-by-row:
def threshhold2(image, val):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = np.median(image[:, row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
This method works pretty well with images like this:
Not quite well for this one but it is not that bad:
Very terrible:
Only the left half looks good
...
As you can see; this algorithm works well only for some images.
I will be glad to see more experienced people's ideas.
Images are not for me by the way.
Entire source code:
import os
import cv2
import numpy as np
files_to_see = os.listdir("data_set")
files_to_see.sort()
current_file = 0
print(files_to_see)
def slice(image, size):
out = []
x_count = image.shape[1] // size
y_count = image.shape[0] // size
for y_idx in range(y_count):
for x_idx in range(x_count):
out.append(
(
(y_idx, x_idx),
image[y_idx * size: (y_idx + 1) * size,
x_idx * size: (x_idx + 1) * size]
)
)
return y_count, x_count, out
def normalize(image):
image = image.copy()
min_pix = image.min()
max_pix = image.max()
for y in range(image.shape[0]):
for x in range(image.shape[1]):
val = image[y, x]
val -= min_pix
val *= 255 / (max_pix - min_pix)
image[y, x] = round(val)
# image -= min_pix
# image *= round(255 / (max_pix - min_pix))
return image
def threshhold(image, val, method):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
# tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
tresh = method(image[row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
def threshhold2(image, val, method):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = method(image[:, row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
def recalculate_threshhold(v):
global original_current_image, thresh_current_image, y_c, x_c, slices
method = np.mean
if cv2.getTrackbarPos("method", "xb labeler") == 0:
method = np.median
thresh_current_image = threshhold2(original_current_image, cv2.getTrackbarPos("threshhold_value", "xb labeler") / 1000, method)
y_c, x_c, slices = slice(thresh_current_image, 128)
def thresh_current_image_mouse_event(event, x, y, flags, param):
if event == 1:
print(x // 128, y // 128)
cv2.imshow("slice", slices[(x // 128) + (y // 128) * x_c][1])
cv2.namedWindow("xb labeler")
cv2.createTrackbar("threshhold_value", "xb labeler", 0, 1000, recalculate_threshhold)
cv2.createTrackbar("method", "xb labeler", 0, 1, recalculate_threshhold)
cv2.namedWindow("thresh_current_image")
cv2.setMouseCallback("thresh_current_image", thresh_current_image_mouse_event)
def init():
global original_current_image, thresh_current_image, x_c, y_c, slices, files_to_see, current_file
original_current_image = cv2.imread("data_set/" + files_to_see[current_file], cv2.CV_8UC1)
original_current_image = cv2.resize(original_current_image, (512, 512))
original_current_image = normalize(original_current_image)
original_current_image = cv2.GaussianBlur(original_current_image, (5, 5), 10)
recalculate_threshhold(1)
y_c, x_c, slices = slice(thresh_current_image, 128)
init()
while True:
cv2.imshow("thresh_current_image", thresh_current_image)
cv2.imshow("xb labeler", original_current_image)
k = cv2.waitKey(1)
if k == ord('p'):
cv2.imwrite("ssq.png", thresh_current_image)
current_file += 1
init()
cv2.destroyAllWindows()
EDIT: Added original images:
I have written two scripts main_program.py and detector.py to do perform face detection and recognition. I first train the classifier then use it recognize faces. But the training isn't happening because the program exits with an error.
here is the code for both the scripts:
For main_program.py :-
import cv2
import os
import numpy as np
import detector as dec
test_img = cv2.imread(r'C:\Users\JasonPC\Desktop\FaceDetection\Test Images\j1.jpg')
faces_detected, gray_img = dec.faceDetect(test_img)
print('Faces Detected: ', faces_detected)
faces, faceID = dec.labels_for_training_data(r'C:\Users\JasonPC\Desktop\FaceDetection\Resources')
face_recognizer = dec.train_classifier(faces, faceID)
name = {0:'Obama', 1:'Trump'}
for faces in faces_detected:
(x, y, w, h) = face
roi_gray = gray_img[y : y + h, x : x + h]
label, confidence = face_recognizer.predict(roi_gray)
confidence('confidence: ', confidence)
print('label: ', label)
dec.draw_rect(test_img, face)
predicted_name = name[label]
dec.put_text(test_img, predicted_name, x, y)
result_img = cv2.resize(test_img, (200, 200))
cv2.imshow('Face Detection', result_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
And here the code for detector.py
import cv2
import os
import numpy as np
def faceDetect(img):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
classifier = cv2.CascadeClassifier(r'C:\Users\JasonPC\Documents\CodeVault\Python\FaceDetection\Classifiers\haarcascade_frontalface_alt.xml')
# classifier = cv2.CascadeClassifier(r'C:\Users\JasonPC\Documents\CodeVault\Python\FaceRecognition\Classifier.xml')
# val = classifier.load(r'C:\Users\JasonPC\Documents\CodeVault\Python\FaceRecognition\Classifier.xml')
faces = classifier.detectMultiScale(gray_img, scaleFactor = 1.5, minNeighbors = 5 )
return faces, gray_img
def labels_for_training_data(directory):
faces = []
faceID = []
for path, subdirnames, filenames in os.walk(directory):
for filename in filenames:
if filename.startswith("."):
print('Skipping system file...')
continue
id = os.path.basename(path)
img_path = os.path.join(path, filename)
print('img_path: ', img_path)
print('id: ', id)
test_img = cv2.imread(img_path)
if test_img is None:
print('Image is not loaded properly')
continue
faces_rect, gray_img = faceDetect(test_img)
if len(faces_rect) != 1:
continue # since multiple faces has been detected in an single image
(x, y , w, h) = faces_rect[0]
roi_gray = gray_img[y : y + 1, x : x + h]
faces.append(roi_gray)
faceID.append(int(id))
return faces, faceID
def train_classifier(faces, faceID):
face_recognizer = cv2.face.LBPHFaceRecognizer_create()
face_recognizer.train(faces, np.array(faceID))
return face_recognizer
def draw_rect(test_img, face):
(x, y , w, h) = face
cv2.rectangle(test_img, (x, y), (x + w, y + h), (255, 0, 0), thickness = 5)
def put_text(test_img, text, x, y):
cv2.putText(test_img, text, (x, y), cv2.FONT_HERSHEY_DUPLEX, 5, (255,0, 0), 6)
But I am stuck with an error!
Traceback (most recent call last):
File "c:/Users/JasonPC/Desktop/FaceDetection/main_program.py", line
29, in <module>
face_recognizer = dec.train_classifier(faces, faceID)
File "c:\Users\JasonPC\Desktop\FaceDetection\detector.py", line 44,
in train_classifier
face_recognizer.train(faces, np.array(faceID))
cv2.error: OpenCV(4.3.0) C:\projects\opencv-python\opencv\modules\core\src\matrix.cpp:235: error: (-215:Assertion failed) s >= 0 in function 'cv::setSize'
Please help me debug this error!
many many thanks!
I want to detect crop rows using aerial images(CRBD). I have done the necessary image processing like converting to grayscale, edge detection, skeletonization, Hough Transform(to identify and draw the lines), and I also set the accumulator angle to math.pi*4.0/180, which I varied time after time.
The algorithm works well at detection approximately 4 crop lines, I want to improve it so that it can detect variable number of crop rows, and it should be able to highlight this crop rows
Here is a link to the sample code I modified Here
import os
import os.path
import time
import cv2
import numpy as np
import math
### Setup ###
image_data_path = os.path.abspath('../8470p/CRBD/Images')
gt_data_path = os.path.abspath('../8470p/GT data')
image_out_path = os.path.abspath('../8470p/algorithm_1')
use_camera = False # whether or not to use the test images or camera
images_to_save = [2, 3, 4, 5] # which test images to save
timing = False # whether to time the test images
curr_image = 0 # global counter
HOUGH_RHO = 2 # Distance resolution of the accumulator in pixels
HOUGH_ANGLE = math.pi*4.0/18 # Angle resolution of the accumulator in radians
HOUGH_THRESH_MAX = 80 # Accumulator threshold parameter. Only those lines are
returned that get votes
HOUGH_THRESH_MIN = 10
HOUGH_THRESH_INCR = 1
NUMBER_OF_ROWS = 10 # how many crop rows to detect
THETA_SIM_THRESH = math.pi*(6.0/180) # How similar two rows can be
RHO_SIM_THRESH = 8 # How similar two rows can be
ANGLE_THRESH = math.pi*(30.0/180) # How steep angles the crop rows can be in
radians
def grayscale_transform(image_in):
'''Converts RGB to Grayscale and enhances green values'''
b, g, r = cv2.split(image_in)
return 2*g - r - b
def save_image(image_name, image_data):
'''Saves image if user requests before runtime'''
if curr_image in images_to_save:
image_name_new = os.path.join(image_out_path, "
{0}_{1}.jpg".format(image_name,
str(curr_image) ))
def skeletonize(image_in):
'''Inputs and grayscale image and outputs a binary skeleton image'''
size = np.size(image_in)
skel = np.zeros(image_in.shape, np.uint8)
ret, image_edit = cv2.threshold(image_in, 0, 255, cv2.THRESH_BINARY |
cv2.THRESH_OTSU)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
done = False
while not done:
eroded = cv2.erode(image_edit, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(image_edit, temp)
skel = cv2.bitwise_or(skel, temp)
image_edit = eroded.copy()
zeros = size - cv2.countNonZero(image_edit)
if zeros == size:
done = True
return skel
def tuple_list_round(tuple_list, ndigits_1=0, ndigits_2=0):
'''Rounds each value in a list of tuples to the number of digits
specified
'''
new_list = []
for (value_1, value_2) in tuple_list:
new_list.append( (round(value_1, ndigits_1), round(value_2,
ndigits_2)) )
return new_list
def crop_point_hough(crop_points):
'''Iterates though Hough thresholds until optimal value found for
the desired number of crop rows. Also does filtering.
'''
height = len(crop_points)
width = len(crop_points[0])
hough_thresh = HOUGH_THRESH_MAX
rows_found = False
while hough_thresh > HOUGH_THRESH_MIN and not rows_found:
crop_line_data = cv2.HoughLines(crop_points, HOUGH_RHO, HOUGH_ANGLE,
hough_thresh)
crop_lines = np.zeros((height, width, 3), dtype=np.uint8)
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
if crop_line_data is not None:
# get rid of duplicate lines. May become redundant if a similarity
threshold is done
crop_line_data_1 = tuple_list_round(crop_line_data[:,0,:],-1, 4)
crop_line_data_2 = []
x_offsets = []
crop_lines_hough = np.zeros((height, width, 3), dtype=np.uint8)
for (rho, theta) in crop_line_data_1:
a = math.cos(theta)
b = math.sin(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines_hough, point1, point2, (0, 0, 255), 2)
for curr_index in range(len(crop_line_data_1)):
(rho, theta) = crop_line_data_1[curr_index]
is_faulty = False
if ((theta >= ANGLE_THRESH) and (theta <= math.pi-
ANGLE_THRESH)) or(theta <= 0.001):
is_faulty = True
else:
for (other_rho, other_theta) in
crop_line_data_1[curr_index+1:]:
if abs(theta - other_theta) < THETA_SIM_THRESH:
is_faulty = True
elif abs(rho - other_rho) < RHO_SIM_THRESH:
is_faulty = True
if not is_faulty:
crop_line_data_2.append( (rho, theta) )
for (rho, theta) in crop_line_data_2:
a = math.cos(theta)
b = math.sin(theta)
c = math.tan(theta)
x0 = a*rho
y0 = b*rho
point1 = (int(round(x0+1000*(-b))), int(round(y0+1000*(a))))
point2 = (int(round(x0-1000*(-b))), int(round(y0-1000*(a))))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho+c*
#(0.5*height))),0 ,239), 0), 4, (255,0,0), -1)
#cv2.circle(crop_lines, (np.clip(int(round(a*rho-c*
#(0.5*height))),0 ,239), height), 4, (255,0,0), -1)
cv2.circle(crop_lines, (np.clip(int(round(rho/a)),0 ,239), 0), 5,
(255,0,0), -1)
#cv2.circle(img,(447,63), 63, (0,0,255), -1)
x_offsets.append(np.clip(int(round(rho/a)),0 ,239))
cv2.line(crop_lines, point1, point2, (0, 0, 255), 2)
if len(crop_line_data_2) >= NUMBER_OF_ROWS:
rows_found = True
hough_thresh -= HOUGH_THRESH_INCR
if rows_found == False:
print(NUMBER_OF_ROWS, "rows_not_found")
x_offset = min (x_offsets)
width = max (x_offsets) - min (x_offsets)
return (crop_lines, crop_lines_hough, x_offset, width)
def crop_row_detect(image_in):
'''Inputs an image and outputs the lines'''
save_image('0_image_in', image_in)
### Grayscale Transform ###
image_edit = grayscale_transform(image_in)
save_image('1_image_gray', image_edit)
### Skeletonization ###
skeleton = skeletonize(image_edit)
save_image('2_image_skeleton', skeleton)
### Hough Transform ###
(crop_lines, crop_lines_hough, x_offset, width) =
crop_point_hough(skeleton)
save_image('3_image_hough',cv2.addWeighted(image_in, 1,
crop_lines_hough, 1, 0.0))
save_image('4_image_lines',cv2.addWeighted(image_in, 1,crop_lines,1,0.0))
return (crop_lines , x_offset, width)
def main():
if use_camera == False:
diff_times = []
for image_name in sorted(os.listdir(image_data_path)):
global curr_image
curr_image += 1
start_time = time.time()
image_path = os.path.join(image_data_path, image_name)
image_in = cv2.imread(image_path)
crop_lines = crop_row_detect(image_in)
if timing == False:
cv2.imshow(image_name, cv2.addWeighted(image_in, 1,
crop_lines, 1, 0.0))
print('Press any key to continue...')
cv2.waitKey()
cv2.destroyAllWindows()
### Timing ###
else:
diff_times.append(time.time() - start_time)
mean = 0
for diff_time in diff_times:
mean += diff_time
### Display Timing ###
print('max time = {0}'.format(max(diff_times)))
print('ave time = {0}'.format(1.0 * mean / len(diff_times)))
cv2.waitKey()
else: # use camera. Hasn't been tested on a farm.
capture = cv2.VideoCapture(0)
while cv2.waitKey(1) < 0:
_, image_in = capture.read()
(crop_lines, x_offset, width) = crop_row_detect(image_in)
cv2.imshow("Webcam", cv2.addWeighted(image_in, 1, crop_lines, 1,
0.0))
capture.release()
cv2.destroyAllWindows()
main()
Input Image
[![Input Image][1]][1]
Output Image
[![][4]][4]
Expected Output
[![Expected Output][5]][5]
I have tried thresholding with cv2.inRange() to find green lines, but am still not getting the desired out.
Also the algorithms seems to be only draw the crop_line_data_2 as shown in the Output Image, it doesn't draw the crop_line_data_1
def threshold_green(image_in):
hsv = cv2.cvtColor(image_in, cv2.COLOR_BGR2HSV)
## mask of green (36,25,25) ~ (86, 255,255)
# mask = cv2.inRange(hsv, (36, 25, 25), (86, 255,255))
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
## slice the green
imask = mask>0
green = np.zeros_like(image_in, np.uint8)
green[imask] = image_in[imask]
return green
I found some example solution a detected common element on the two images.
It's code on Python with OpenCV and I execute on my two examples images:
'''
Feature-based image matching sample.
Note, that you will need the https://github.com/opencv/opencv_contrib repo for SIFT and SURF
USAGE
find_obj.py [--feature=<sift|surf|orb|akaze|brisk>[-flann]] [ <image1> <image2> ]
--feature - Feature to use. Can be sift, surf, orb or brisk. Append '-flann'
to feature name to use Flann-based matcher instead bruteforce.
Press left mouse button on a feature point to see its matching point.
'''
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2 as cv
from common import anorm, getsize
FLANN_INDEX_KDTREE = 1 # bug: flann enums are missing
FLANN_INDEX_LSH = 6
def init_feature(name):
chunks = name.split('-')
if chunks[0] == 'sift':
detector = cv.xfeatures2d.SIFT_create()
norm = cv.NORM_L2
elif chunks[0] == 'surf':
detector = cv.xfeatures2d.SURF_create(200)
norm = cv.NORM_L2
elif chunks[0] == 'orb':
detector = cv.ORB_create(1400)
norm = cv.NORM_HAMMING
elif chunks[0] == 'akaze':
detector = cv.AKAZE_create()
norm = cv.NORM_HAMMING
elif chunks[0] == 'brisk':
detector = cv.BRISK_create()
norm = cv.NORM_HAMMING
else:
return None, None
if 'flann' in chunks:
if norm == cv.NORM_L2:
flann_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
else:
flann_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
matcher = cv.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329)
else:
matcher = cv.BFMatcher(norm)
return detector, matcher
def filter_matches(kp1, kp2, matches, ratio = 0.75):
mkp1, mkp2 = [], []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
m = m[0]
mkp1.append( kp1[m.queryIdx] )
mkp2.append( kp2[m.trainIdx] )
p1 = np.float32([kp.pt for kp in mkp1])
p2 = np.float32([kp.pt for kp in mkp2])
kp_pairs = zip(mkp1, mkp2)
return p1, p2, list(kp_pairs)
def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((max(h1, h2), w1+w2), np.uint8)
vis[:h1, :w1] = img1
vis[:h2, w1:w1+w2] = img2
vis = cv.cvtColor(vis, cv.COLOR_GRAY2BGR)
if H is not None:
corners = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]])
corners = np.int32( cv.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2) + (w1, 0) )
cv.polylines(vis, [corners], True, (255, 255, 255))
if status is None:
status = np.ones(len(kp_pairs), np.bool_)
p1, p2 = [], [] # python 2 / python 3 change of zip unpacking
for kpp in kp_pairs:
p1.append(np.int32(kpp[0].pt))
p2.append(np.int32(np.array(kpp[1].pt) + [w1, 0]))
green = (0, 255, 0)
red = (0, 0, 255)
kp_color = (51, 103, 236)
for (x1, y1), (x2, y2), inlier in zip(p1, p2, status):
if inlier:
col = green
cv.circle(vis, (x1, y1), 2, col, -1)
cv.circle(vis, (x2, y2), 2, col, -1)
else:
col = red
r = 2
thickness = 3
cv.line(vis, (x1-r, y1-r), (x1+r, y1+r), col, thickness)
cv.line(vis, (x1-r, y1+r), (x1+r, y1-r), col, thickness)
cv.line(vis, (x2-r, y2-r), (x2+r, y2+r), col, thickness)
cv.line(vis, (x2-r, y2+r), (x2+r, y2-r), col, thickness)
vis0 = vis.copy()
for (x1, y1), (x2, y2), inlier in zip(p1, p2, status):
if inlier:
cv.line(vis, (x1, y1), (x2, y2), green)
cv.imshow(win, vis)
def onmouse(event, x, y, flags, param):
cur_vis = vis
if flags & cv.EVENT_FLAG_LBUTTON:
cur_vis = vis0.copy()
r = 8
m = (anorm(np.array(p1) - (x, y)) < r) | (anorm(np.array(p2) - (x, y)) < r)
idxs = np.where(m)[0]
kp1s, kp2s = [], []
for i in idxs:
(x1, y1), (x2, y2) = p1[i], p2[i]
col = (red, green)[status[i][0]]
cv.line(cur_vis, (x1, y1), (x2, y2), col)
kp1, kp2 = kp_pairs[i]
kp1s.append(kp1)
kp2s.append(kp2)
cur_vis = cv.drawKeypoints(cur_vis, kp1s, None, flags=4, color=kp_color)
cur_vis[:,w1:] = cv.drawKeypoints(cur_vis[:,w1:], kp2s, None, flags=4, color=kp_color)
cv.imshow(win, cur_vis)
cv.setMouseCallback(win, onmouse)
return vis
if __name__ == '__main__':
print(__doc__)
import sys, getopt
opts, args = getopt.getopt(sys.argv[1:], '', ['feature='])
opts = dict(opts)
feature_name = opts.get('--feature', 'brisk')
try:
fn1, fn2 = args
except:
fn1 = '../data/box.png'
fn2 = '../data/box_in_scene.png'
img1 = cv.imread(fn1, 0)
img2 = cv.imread(fn2, 0)
detector, matcher = init_feature(feature_name)
if img1 is None:
print('Failed to load fn1:', fn1)
sys.exit(1)
if img2 is None:
print('Failed to load fn2:', fn2)
sys.exit(1)
if detector is None:
print('unknown feature:', feature_name)
sys.exit(1)
print('using', feature_name)
kp1, desc1 = detector.detectAndCompute(img1, None)
kp2, desc2 = detector.detectAndCompute(img2, None)
print('img1 - %d features, img2 - %d features' % (len(kp1), len(kp2)))
def match_and_draw(win):
print('matching...')
raw_matches = matcher.knnMatch(desc1, trainDescriptors = desc2, k = 2) #2
p1, p2, kp_pairs = filter_matches(kp1, kp2, raw_matches)
if len(p1) >= 4:
H, status = cv.findHomography(p1, p2, cv.RANSAC, 5.0)
print('%d / %d inliers/matched' % (np.sum(status), len(status)))
else:
H, status = None, None
print('%d matches found, not enough for homography estimation' % len(p1))
_vis = explore_match(win, img1, img2, kp_pairs, status, H)
match_and_draw('find_obj')
cv.waitKey()
cv.destroyAllWindows()
Its original images
Its images with some changes
And result in Python Code like this:
python diff_good.py --feature=surf images/org_web.png images/change1.png
Result detected common object
I try to find some solution - how change markered type of common object for example: instead show area with many colors point to some contours, shape (square, rectangle etc.) something like this :
This is some example second images with markered by green rectangles commons element - block
example result images
I want to change this code to get markered only common contours - the area without many points or line like now, because it's not clear to present and understood.
Maybe will be the best solution make third images - result output image with markered only common area like rectangles objects or contours.
But after many hours spend for search solution on google, forums and groups I failed to change this code to act as it needs, maybe here I will find some help, example or suggestions how to modify it.
I found this repo x-img-diff.
They cluster the keypoints into the same boxes and use some other methods to handle the boxes. You can try this way.