I am able to find the faces and save them in my local directory using python and open cv as per code below from video
import cv2
import numpy as np
import os
vc = cv2.VideoCapture('new1.avi')
c=1
if vc.isOpened():
rval , frame = vc.read()
else:
rval = False
while rval:
rval, frame = vc.read()
cv2.imwrite(str(c) + '.jpg',frame)
image_name=str(c)+'.jpg'
cascPath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascPath)
image=cv2.imread(image_name)
gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=5,
minSize=(30, 30),
flags = cv2.cv.CV_HAAR_SCALE_IMAGE
)
print "Found {0} faces!".format(len(faces))
if len(faces)>=1:
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imshow("Faces found" ,image)
cv2.waitKey(0)
else:
a="rm "+image_name
os.popen(a)
c = c + 1
cv2.waitKey(1)
vc.release()
But now i want to get identification of that person which has face in that video....
How can i define the person's identification?
Like to scan the face and match it into my local face database and if match found give the name and etc etc
To differentiate between people in photos is not a trivial task, but there are some examples out there. As mentioned by Derman in an earlier comment the best way is to use machine learning to teach the program what different persons faces looks like. One way is to manually find and extract features in peoples faces, such as the distance between eyes ratio to distance between eyes and mouth and so on. This would though need attention to the effects of lens distortion and perspective. There is multiple research papers discussing the best techniques, like this paper using eigen vectors from a set of faces to find most probable match
Face Recognition Using Eigen Faces
There is a machine learning toolbox for Python which is called scikit-learn which implements support for classification, regression, clustering and so on. You can use it to train neural networks and support vector machines among others. Here is a complete example of how to implement the Eigenface method using SVM with scikit-learn and python:
Complete implementation using Python
You can use Either EigenFaceRecognizer or FisherFaceRecognizer or LBHP
All these three algorithms are inbuilt in python
# Create a recognizer object
recognizer = cv2.face.createEigenFaceRecognizer()
# But Remember for EigenFaces all the images whether training or testing has to be of same shape
#==========================================================================
# get_images_and_labels function will give us list of images and list of labels to train our recognizer that we created in the first line
# function requires the path of the directory where all the images is stored
#===========================================================================
def get_images_and_labels(path):
# Append all the absolute image paths in a list image_paths
image_paths = [os.path.join(path, f) for f in os.listdir(path) if not
f.endswith('.sad')]
# images will contains face images
images = []
# labels will contains the label that is assigned to the image
labels = []
final_images = []
largest_image_size = 0
largest_width = 0
largest_height = 0
for image_path in image_paths:
# Read the image and convert to grayscale
image_pil = Image.open(image_path).convert('L')
# Convert the image format into numpy array
image = np.array(image_pil, 'uint8')
# Get the label of the image
nbr = int(os.path.split(image_path)[1].split(".")[0].replace("subject", ""))
# Detect the face in the image
faces = faceCascade.detectMultiScale(image)
# If face is detected, append the face to images and the label to labels
for (x, y, w, h) in faces:
images.append(image[y: y + h, x: x + w])
labels.append(nbr)
cv2.imshow("Adding faces to traning set...", image[y: y + h, x: x + w])
cv2.waitKey(50)
# return the images list and labels list
for image in images:
if image.size > largest_image_size:
largest_image_size = image.size
largest_width, largest_height = image.shape
for image in images:
image = cv2.resize(image, (largest_width, largest_height), interpolation=cv2.INTER_CUBIC)
final_images.append(image)
return final_images, labels, largest_width, largest_height
#===================================================================
# Perform the tranining
# trainer takes two parameters as input
# first parameter is the list of images
# second parameter is a numpy array of their corresponding labels
#===================================================================
recognizer.train(images, np.array(labels)) # training takes as input the list
image_paths = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.sad')]
for image_path in image_paths:
predict_image_pil = Image.open(image_path).convert('L')
predict_image = np.array(predict_image_pil, 'uint8')
faces = faceCascade.detectMultiScale(predict_image)
for (x, y, w, h) in faces:
result = cv2.face.MinDistancePredictCollector()
predict_image = predict_image[y: y + h, x: x + w]
predict_image = cv2.resize(predict_image, (max_width, max_heigth), interpolation=cv2.INTER_CUBIC)
# =========================================================
# predict method will give us the prediction
# we will get the label in the next statement
# predicted_image is the image that you want to recognize
# =========================================================
recognizer.predict(predict_image, result, 0) # this statement will give the prediction
# ==========================================
# This statement below will give us label
# ==========================================
nbr_predicted = result.getLabel()
# ==========================================
# conf will tell us how much confident our recognizer is in it's prediction
# ==========================================
conf = result.getDist()
nbr_actual = int(os.path.split(image_path)[1].split(".")[0].replace("subject", ""))
if nbr_actual == nbr_predicted:
print("{} is Correctly Recognized with confidence {}".format(nbr_actual, conf))
else:
print("{} is Incorrect Recognized as {}".format(nbr_actual, nbr_predicted))
sys.exit()
Related
I´m construction a dataset with more than one image for each person for python face_recognition package. It will add a classifier on top of the bultin model. See also this example: face_recognition_knn.py. here is my code:
# import the necessary packages
from imutils import paths
import face_recognition
import pickle
import cv2
import os
# grab the paths to the input images in our dataset
print("[INFO] quantifying faces...")
imagePaths = list(paths.list_images('dataset'))
# initialize the list of known encodings and known names
knownEncodings = []
knownNames = []
# loop over the image paths
for (i, imagePath) in enumerate(imagePaths):
# extract the person name from the image path
print(f"[INFO] processing image {i+1}/{len(imagePaths)} -> {imagePath}")
name = imagePath.split(os.path.sep)[-2]
# load the input image and convert it from BGR (OpenCV ordering)
# to dlib ordering (RGB)
image = cv2.imread(imagePath)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# detect the (x, y)-coordinates of the bounding boxes
# corresponding to each face in the input image
boxes = face_recognition.face_locations(rgb,
model='hog') #can be cnn
# compute the facial embedding for the face
encodings = face_recognition.face_encodings(rgb, boxes)
# loop over the encodings
for encoding in encodings:
# add each encoding + name to our set of known names and
# encodings
knownEncodings.append(encoding)
knownNames.append(name)
# dump the facial encodings + names to disk
print("[INFO] serializing encodings...")
data = {"encodings": knownEncodings, "names": knownNames}
f = open('encodings.pickle', "wb")
f.write(pickle.dumps(data))
f.close()
Then, I try to identify these people with this code:
import face_recognition
import pickle
import cv2
import numpy as np
import requests
from datetime import datetime
# load the known faces and embeddings
print("[INFO] loading encodings...")
data = pickle.loads(open("encodings.pickle", "rb").read())
def processa_imagem(url):
# load the input image and convert it from BGR to RGB and returns file with cofidence
image = cv2.imread(url)
if image is None:
print(f'Image not found: {imagem}')
#image = np.array(image, dtype=np.uint8)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# detect the (x, y)-coordinates of the bounding boxes corresponding
# to each face in the input image, then compute the facial embeddings
# for each face
print("[INFO] recognizing faces...")
boxes = face_recognition.face_locations(rgb,
model='hog')
encodings = face_recognition.face_encodings(rgb, boxes)
# initialize the list of names for each face detected
names = []
# loop over the facial embeddings
for encoding in encodings:
# attempt to match each face in the input image to our known
# encodings
## ATTENTION! the ideal is face_recognition.api.batch_face_locations but i dont have a GPU
matches = face_recognition.face_distance(data["encodings"],
encoding)
name = "unkown"
# check to see if we have found a match
if max(matches) > 0.7:
# find the indexes of all matched faces then initialize a
# dictionary to count the total number of times each face
# was matched
matchedIdxs = [i for (i, b) in enumerate(matches) if b]
counts = {}
# loop over the matched indexes and maintain a count for
# each recognized face face
for i in matchedIdxs:
name = data["names"][i]
counts[name] = counts.get(name, 0) + 1
# determine the recognized face with the largest number of
# votes (note: in the event of an unlikely tie Python will
# select first entry in the dictionary)
name = max(counts, key=counts.get)
# update the list of names
names.append(name)
# loop over the recognized faces
for ((top, right, bottom, left), name) in zip(boxes, names):
# draw the predicted face name on the image
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
y = top - 15 if top - 15 > 15 else top + 15
cv2.putText(image, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX,
0.75, (255, 0, 0), 2)
now = datetime.now()
current_time = now.strftime("%H%M%S%f")
#file_path = f'static/face-{current_time}.jpg'
file_path = f'face-{current_time}.jpg'
cv2.imwrite(file_path,image)
return (file_path, ', '.join(names))
On my dataset, I´ve added, on average, about 10 photos of each individual. The script uses face_recognition.face_distance and it works well to recognize someone in the dataset.
The problema is that, when I use it with someone that OUT. For these people, sometimes I still get about 0.90 higher confidence false positive results.
Some of the pictures in dataset are low quality. Maybe that´s the reason? Should I change my approach, using more detailed photos (2 or 3) and maybe encoding them with jitters?
Thanks for any input!
i am using yolo - python to detect object from multiple images.
i need to loop through result (describe detected object) to write that result in multiple text files (same name with name of image).
Example: 1.jpg detected 1 car & 1 dog => 1.txt contain car & dog.
i tried create multiple text file with glob(input image folder)
i tried loop through text (describe detected object) but i can't write result of each image to each text file. Something wrong with loop.
Please check the code in bottom, part of cv2.putText.
import numpy as np
import argparse
import time
import cv2
import os, os.path
import glob
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True,
help="path to input image")
ap.add_argument("-y", "--yolo", required=True,
help="base path to YOLO directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="threshold when applyong non-maxima suppression")
args = vars(ap.parse_args())
num = 1
text_path_list = []
def create_result_text(textDir = "input"):
for i in glob.glob("input/*.jpg"):
f = open(i.rsplit( ".", 1 )[ 0 ] + ".txt", "w+")
f.close()
temp = os.listdir(textDir)
for i in temp:
if (i.endswith("txt")):
text_path_list.append(i)
create_result_text()
print(text_path_list)
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "yolov3.txt"])
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
dtype="uint8")
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])
# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# read input image
imageDir = "input" #specify your path here
image_path_list = []
valid_image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] #specify your vald extensions here
valid_image_extensions = [item.lower() for item in valid_image_extensions]
for file in os.listdir(imageDir):
extension = os.path.splitext(file)[1]
if extension.lower() not in valid_image_extensions:
continue
image_path_list.append(os.path.join(imageDir, file))
# load input image and grab its spatial dimensions
for imagePath in image_path_list:
image = cv2.imread(imagePath)
(H, W) = image.shape[:2]
# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
end = time.time()
# show timing information on YOLO
print("[INFO] YOLO took {:.6f} seconds".format(end - start))
# initialize our lists of detected bounding boxes, confidences, and
# class IDs, respectively
boxes = []
confidences = []
classIDs = []
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of
# the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"],
args["threshold"])
result_texts = []
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# draw a bounding box rectangle and label on the image
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
0.5, color, 2)
result_texts.append(text)
print(result_texts)
for text in result_texts:
np.savetxt("input/" + str(num) + ".txt", np.array(result_texts), fmt="%s")
num += 1
i expect each of text to has name of object of each image
but all text have name of object of last image.
I am trying to use the haar-cascade in OpenCV 4.0 to detect faces for emotion, gender & age estimation. sometimes the detectmultiscale() function returns an empty tuple which raises an error in the later parts of recognition.
I tried creating a while loop until the face is detected, but it seems once the face is not detected it is not being detected again(in the same captured frame), I get empty tuples returned. the weird thing is that sometimes the program works flawlessly.
the detection model is being loaded correctly, since cv2.CascadeClassifier.empty(face_cascade) returns False.
there seems to be no problem with the captured frame since I can display it properly.
after searching I found that detectmultiscale() does, in fact, return an empty tuple when no faces are detected.
Python OpenCV face detection code sometimes raises `'tuple' object has no attribute 'shape'`
face_cascade = cv2.CascadeClassifier(
'C:\\Users\\kj\\Desktop\\jeffery 1\\trained_models\\detection_models\\haarcascade_frontalface_alt.xml')
retval = cv2.CascadeClassifier.empty(face_cascade)
print(retval)
returns False
def video_cap(out_queue):
video_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW)
#video_capture.set(3, 768)
#video_capture.set(4, 1024)
while True:
ret, bgr_image = video_capture.read()
cv2.imshow('frame',bgr_image)
cv2.waitKey(1000)
cv2.destroyAllWindows()
if video_capture.isOpened() == False :
video_capture.open(0)
if(ret):
gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
faces = detect_faces(face_detection, gray_image)
ret_list = [gray_image, rgb_image, faces]
print("DEBUG: VIDEO_CAPTURE MODULE WORKING")
out_queue.put(ret_list)
return
video_cap function is threaded
def detect_faces(detection_model, gray_image_array):
faces1 = detection_model.detectMultiScale(gray_image_array, scaleFactor= 2, minNeighbors=10,minSize=(64,64))
while(len(faces1)== 0 ):
faces1 = detection_model.detectMultiScale(gray_image_array, scaleFactor=2, minNeighbors=10, minSize=(64, 64))
print(faces1)
if(len(faces1)!=0):
break
return faces1
I get the output:
()
()
()
()....
goes on until I terminate.
how do I fix the problem?
This is a snippet of the code I used. I removed the ARGUMENTS in the detectMultiScale() function and it ran fine.
Also, make sure you have the correct path to the xml files.
classifier = cv2.CascadeClassifier("../../../l-admin/anaconda3/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml")
img = cv2.imread('../Tolulope/Adetula Tolulope (2).jpg')
face = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = classifier.detectMultiScale(face)
print(type(faces), faces)
for (x, y, w, h) in faces:
img = cv2.imwrite("facesa.png", cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3))
On a Secondary note, the reason my own did work might be because my camera did locate my face due to the lightning. So I suggest you try it out with a picture first before using the video.
I have a similar issue when I use jpg format but the main problem is always in the format of the image as when i used png it automatically give the tuple with correct values.
classifier = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
# reading the image
img = cv2.imread('i.png')
# showing the image
#cv2.imshow('shaswat face detection ',img)
# making image to gray scale as black and white
grayscaled_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# cv2.imshow('shaswat face detection ',grayscaled_img)
# detecting the image
# return top left and bottom right points
faces = classifier.detectMultiScale(grayscaled_img)
print(faces)
#cv2.rectangle(img , face_coordinates[0] , face_coordinates[1] , (255,0,0) , 10)
the output shows
[[ 87 114 361 361]]
Image stitching not work properly. The warped image is cropped and interpolation cannot be done because images do not intersect.
Hi,
I was assigned an homework in which I have to stitch togheter two images, shot by different cameras.
I should find the homography matrix and then warp the second image using this matrix. At the end I must interpolate the two images.
Unfortunately, the code I wrote seems not to work properly. During second image warp I lost most of the image information; a lot of pixels are black and not the whole transformed image is transformed.
I track in the two images four pixels each, in the same order. Below you can find the piece of code I wrote.
# Globals
points = []
def show_and_fetch(image, title):
cv2.namedWindow(title, cv2.WINDOW_NORMAL)
cv2.setMouseCallback(title, mouse_callback)
# Show the image
cv2.imshow(title, image)
# Wait for user input to continue
cv2.waitKey(0)
cv2.destroyAllWindows()
# mouse callback function
def mouse_callback(event,x,y,flags,param):
if event == cv2.EVENT_LBUTTONDOWN:
points.append([x, y])
def stitching():
"""
This procedure stiches two images
:return:
"""
print "Stitching starts..."
###########################################################################
# Get input information
in_file_1 = utils.get_input(
"Insert 0 to exit, the path to the first image to stitch "
"or empty input to use default image: ", "string",
constants.default_stitching1)
in_file_2 = utils.get_input(
"Insert 0 to exit, the path to the second image to stitch "
"or empty input to use default image: ", "string",
constants.default_stitching2)
image_1 = utils.read_image(in_file_1)
image_2 = utils.read_image(in_file_2)
global points
show_and_fetch(image_1, "Image 1 to Stitch")
image_1_points = np.asarray(points, dtype=np.float32)
points = []
show_and_fetch(image_2, "Image 2 to Stitch")
image_2_points = np.asarray(points, dtype=np.float32)
matrix, mask = cv2.findHomography(image_1_points, image_2_points, cv2.RANSAC, 5)
image_1_warped = cv2.warpPerspective(image_1, matrix, dsize=image_1.shape[0:2])
utils.show_image_and_wait(image_1_warped, 'Image 1 warped', wait=False)
utils.show_image_and_wait(image_1, 'Image 1', wait=False)
utils.show_image_and_wait(image_2, 'Image 2')
if __name__ == "__main__":
stitching()
I expect the warped image to be transformed, preserving the most of the information, in terms of pixels. Then interpolation should apply the intersection of the two images that overlap in a certain area.
For instance I want to interpolete these two images:
I've managed to stitch images based on this solution. Here is the stitching result:
Here is the full code:
import cv2
import imutils
import numpy as np
class Stitcher(object):
def __init__(self):
self.isv3 = imutils.is_cv3()
def stitch(self, images, ratio=0.75, reprojThresh=4.0, showMatches=False):
(imageB, imageA) = images
(kpsA, featuresA) = self.detectAndDescribe(imageA)
(kpsB, featuresB) = self.detectAndDescribe(imageB)
# match features between the two images
m = self.matchKeypoints(kpsA, kpsB, featuresA, featuresB, ratio, reprojThresh)
if not m:
return None
# otherwise, apply a perspective warp to stitch the images
# together
(matches, H, status) = m
result = cv2.warpPerspective(imageA, H,
(imageA.shape[1] + imageB.shape[1], imageA.shape[0]))
result[0:imageB.shape[0], 0:imageB.shape[1]] = imageB
# check to see if the keypoint matches should be visualized
if showMatches:
vis = self.drawMatches(imageA, imageB, kpsA, kpsB, matches,
status)
# return a tuple of the stitched image and the
# visualization
return result, vis
# return the stitched image
return result
def detectAndDescribe(self, image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we are using OpenCV 3.X
if self.isv3:
# detect and extract features from the image
descriptor = cv2.xfeatures2d.SIFT_create()
(kps, features) = descriptor.detectAndCompute(image, None)
# otherwise, we are using OpenCV 2.4.X
else:
# detect keypoints in the image
detector = cv2.xfeatures2d.SIFT_create()
kps = detector.detect(gray)
# extract features from the image
extractor = cv2.xfeatures2d.SIFT_create()
(kps, features) = extractor.compute(gray, kps)
# convert the keypoints from KeyPoint objects to NumPy
# arrays
kps = np.float32([kp.pt for kp in kps])
# return a tuple of keypoints and features
return kps, features
def matchKeypoints(self, kpsA, kpsB, featuresA, featuresB,
ratio, reprojThresh):
# compute the raw matches and initialize the list of actual
# matches
matcher = cv2.DescriptorMatcher_create("BruteForce")
rawMatches = matcher.knnMatch(featuresA, featuresB, 2)
matches = []
# loop over the raw matches
for m in rawMatches:
# ensure the distance is within a certain ratio of each
# other (i.e. Lowe's ratio test)
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
# computing a homography requires at least 4 matches
if len(matches) > 4:
# construct the two sets of points
ptsA = np.float32([kpsA[i] for (_, i) in matches])
ptsB = np.float32([kpsB[i] for (i, _) in matches])
# compute the homography between the two sets of points
(H, status) = cv2.findHomography(ptsA, ptsB, cv2.RANSAC,
reprojThresh)
# return the matches along with the homograpy matrix
# and status of each matched point
return (matches, H, status)
# otherwise, no homograpy could be computed
return None
def drawMatches(self, imageA, imageB, kpsA, kpsB, matches, status):
# initialize the output visualization image
(hA, wA) = imageA.shape[:2]
(hB, wB) = imageB.shape[:2]
vis = np.zeros((max(hA, hB), wA + wB, 3), dtype="uint8")
vis[0:hA, 0:wA] = imageA
vis[0:hB, wA:] = imageB
# loop over the matches
for ((trainIdx, queryIdx), s) in zip(matches, status):
# only process the match if the keypoint was successfully
# matched
if s == 1:
# draw the match
ptA = (int(kpsA[queryIdx][0]), int(kpsA[queryIdx][1]))
ptB = (int(kpsB[trainIdx][0]) + wA, int(kpsB[trainIdx][1]))
cv2.line(vis, ptA, ptB, (0, 255, 0), 1)
# return the visualization
return vis
image1 = cv2.imread('image1.jpg')
image2 = cv2.imread('image2.jpg')
stitcher = Stitcher()
(result, vis) = stitcher.stitch([image1, image2], showMatches=True)
cv2.imwrite('result.jpg', result)
I faced with the same problem. It turns out that the order of my images was wrong.
I had two images for stitching. One needs to stitch from left to another. However, I was computing the transform as I want it to stitch from right.
I am working on surf implementation in opencv using python which will detect the template in the given image. I have modified the code such that it will take video capture from the webcam connected and convert into images and then apply surf on it. Following is the modified code.
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
while(True):
ret ,img = cap.read()
# Convert them to grayscale
imgg =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# SURF extraction
surf = cv2.SURF()
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
# Setting up samples and responses for kNN
samples = np.array(descritors)
responses = np.arange(len(kp),dtype = np.float32)
# kNN training
knn = cv2.KNearest()
knn.train(samples,responses)
# Now loading a template image and searching for similar keypoints
template = cv2.imread('template.png')
templateg= cv2.cvtColor(template,cv2.COLOR_BGR2GRAY)
keys,desc = surf.detect(templateg,None,useProvidedKeypoints = False)
for h,des in enumerate(desc):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res,dist = int(results[0][0]),dists[0][0]
if dist<0.1: # draw matched keypoints in red color
color = (0,0,255)
else: # draw unmatched in blue color
print dist
color = (255,0,0)
#Draw matched key points on original image
x,y = kp[res].pt
center = (int(x),int(y))
cv2.circle(img,center,2,color,-1)
#Draw matched key points on template image
x,y = keys[h].pt
center = (int(x),int(y))
cv2.circle(template,center,2,color,-1)
cv2.imwrite('img',img)
cv2.imwrite('tm',template)
cv2.waitKey(0)
cap.release()
But the error which is coming is
knn.train(samples,responses)
TyepError: data type = 17 is not supported
Does anybody have any idea on this?
CV probably expects regular arrays but you are passing numpy arrays instead. Try this
knn.train(samples.tolist(),responses.tolist())