Face Recognition with multiple faces don't detect multiple faces - python

I've written a small program, which detects faces and saves them to an Train file for the recognition.
I have some trouble with this algorithm. Sometimes it throws the error, that the LBPH::Train was feed with empty data, which is wrong.
OpenCV Error: Unsupported format or combination of formats (Empty training data was given. You'll need more than one sample to learn a model.) in cv::LBPH::train, file ........\opencv\modules\contrib\src\facerec.cpp, line 917
Traceback (most recent call last):
Moreover the algorithm detects multiple faces, but recognizes it just as the same face, which is wrong.
Could someone give me a hint on what I'm missing?
import cv2
import os
import numpy as np
import sys
i = 0
global allFaces
global first
first = True
allFaces = []
cap = cv2.VideoCapture(0)
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
recognizer = cv2.createLBPHFaceRecognizer()
font=cv2.cv.InitFont(cv2.cv.CV_FONT_HERSHEY_COMPLEX_SMALL,1,1,0,1)
id = 0
class Face:
def __init__(self, id, face):
self.id = id
self.face = face
self.gatheredFaces = []
def main(self):
print("main")
def getFace(self):
return self.face
def setKnownFace(self):
self.known = False
def getKownFace(self):
return self.knwon
def getId(self):
return self.id
def setFacesInfo(self, frame, face):
x,y,h,w = face
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
self.gatheredFaces.append(gray[y:y+h, x:x+w])
# count = 0
# while (count != 10):
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow("frame in set", frame)
# faces = faceCascade.detectMultiScale(gray)
# for face in faces:
# self.gatheredFaces.append(gray[y:y+h,x:x+w])
# cv2.imshow("gathered Faces", self.gatheredFaces[0])
# cv2.imwrite("dataSet/User"+ str(self.getId()) +".jpg", gray)
# count = count+1
# cv2.waitKey(30)
def getFacesInfo(self):
return self.gatheredFaces
def trainDetector(self):
faceSamples = []
Ids = []
print("laenge von gathered FAces")
print(len(allFaces[0].getFacesInfo()))
for (i) in range(len(allFaces)):
temp = allFaces[i].getFacesInfo()
for (j) in range(len(temp)):
imageNP = np.array(temp[j], 'uint8')
id = allFaces[i].getId()
faces = faceCascade.detectMultiScale(imageNP)
for (x,y,h,w) in faces:
faceSamples.append(imageNP)
Ids.append(id)
recognizer.train(faceSamples, np.array(Ids))
recognizer.save('recognizer/train.yml')
def updateDetector(self):
recognizer.load('recognizer/train.yml')
faceSamples = []
Ids = []
for (i) in range(len(allFaces)):
temp = allFaces[i].getFacesInfo()
for (j) in range(len(temp)):
imageNP = np.array(temp[j], 'uint8')
id = allFaces[i].getId()
faces = faceCascade.detectMultiScale(imageNP)
for (x,y,h,w) in faces:
faceSamples.append(imageNP)
Ids.append(id)
recognizer.update(faceSamples, np.array(Ids))
recognizer.save('recognizer/train.yml')
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow("actual Frame", frame)
cv2.imshow("gray", gray)
faces = faceCascade.detectMultiScale(gray, 1.3, 5)
print(faces)
for face in faces:
x,y,h,w = face
temp = Face(id, frame[y:y+h,x:x+w])
allFaces.append(temp)
temp = None
id = id+1
###Detector
detector = cv2.SIFT()
FLANN_INDEX_KDTREE = 0
flannParam = dict(algorithm = FLANN_INDEX_KDTREE, tree = 5)
flann = cv2.FlannBasedMatcher(flannParam,{})
trainImg = allFaces[0].getFace()
trainKP, trainDecs = detector.detectAndCompute(trainImg, None)
if((len(allFaces)==1) and first):
print("only one object in allFaces")
for i in range(10):
print(i)
allFaces[0].setFacesInfo(frame, face)
allFaces[0].trainDetector()
first = False
else:
for(i) in range(len(allFaces)):
QueryImg = cv2.cvtColor(allFaces[i].getFace(), cv2.COLOR_BGR2GRAY)
queryKP, queryDesc = detector.detectAndCompute(QueryImg, None)
matches = flann.knnMatch(queryDesc, trainDecs, k = 2)
goodMatch = []
for m, n in matches:
if(m.distance < 0.75 * n.distance):
goodMatch.append(m)
if(len(goodMatch) > 30):
print("good match")
#allFaces[i].
tp = []
qp = []
for m in goodMatch:
tp.append(trainKP[m.trainIdx].pt)
qp.append(queryKP[m.queryIdx].pt)
tp, qp = np.float32((tp, qp))
H, status = cv2.findHomography(tp, qp, cv2.RANSAC, 3.0)
allFaces.pop(len(allFaces)-1)
break
else:
print ("bad match")
for i in range(10):
allFaces[len(allFaces)-1].setFacesInfo(frame, face)
allFaces[len(allFaces)-1].updateDetector()
cv2.waitKey(10)
for (x,y,w,h) in faces:
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255),2)
tempid, conf = recognizer.predict(gray[y:y+h,x:x+w])
cv2.cv.PutText(cv2.cv.fromarray(frame), str(tempid),(x,y+h),font,(0,0,255))
cv2.waitKey(30)
cv2.imshow("detectedFace", frame)
cv2.waitKey(30)

Related

Is it possible to run two YOLO (yolov4) object detection models in a single application?

I understand that it would be much simpler to just train YOLOv4 accordingly but:
I have limited computational resources and was hoping I could combine pre-trained models I found online, saving my time, or train a model and combine it with other models available online.
If I have one '.weights' file of a custom object detector that detects traffic signs and another '.weights' file of detector that detect pedestrians. Is there a way to combine these models, so that when run on a video/image (or in real-time capture), it detects pedestrians and traffic signs simultaneously.
By combining I mean, either to edit the 'weights' file somehow to achieve this, or editing the python code (while running the detector) that gets this done. (or any other way)
If not possible - is there any way to make them run in a sequence, efficiently?
Yes, It is possible to do that. Create two different python files and load the model in them. Then create a new file and initialise both of them in the new file. Then take the continous video / image feed as an input, the file will give output of detected traffic signs. Use this output as an input to your pedestrian problem.
import numpy as np
import pandas as pd
import cv2
import time
margin = 20
import detection_cls
run = detection_cls.Ppe_Detection_1()
class Ppe_Detection():
def __init__(self):
self.weightfile = 'yolov4_pretrained.weights'
self.cfgfile = 'cfg/yolov4_pretrained.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("coco.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# print(detection)
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,0,145)
# cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
# print(info)
return info
def cropping_detection(self):
count = 0
cropped_frame = []
value_img = []
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
# counts +=1
detection = ppe.detection(f)
try:
x,y,w,h,cls,conf = detection[0]
except Exception as e:
pass
# for i in range(len(detection)):
# print(detection[i])
try:
if cls=='person':
print("Person detected")
cropped_img = f[y-margin:y+h+margin,x-margin:x+w+margin]
# print(cropped_img)
# value = ppe.detection(cropped_img)
final = run.detection(cropped_img)
print('final',final)
# print(value)
# print(cropped_img)
# value_img.append(value)
# cropped_frame.append(cropped_img)
# cv2.imwrite("data/frame%d.jpg" % count, cropped_img)
cv2.imshow("Hi",cropped_img)
if cv2.waitKey(1) & 0xff==ord('q'):
break
# count += 1
else:
pass
except Exception as e:
print("In except part")
# cap.release()
# cv2.destroyAllWindows()
# print(cropped_img)
return value,cropped_img
ppe = Ppe_Detection()
ppe.cropping_detection()
# ppe.get_classes()
# ppe.run_video()
# take_photo()
# inference_image()
The function that I've imported here can be found below
import numpy as np
import pandas as pd
import cv2
import time
class Ppe_Detection():
def __init__(self):
self.weightfile = 'backup/yolov3_best.weights'
self.cfgfile = 'yolov3.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("obj.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def print_all(self):
print(self.classes)
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*width)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,255,145)
cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
print(info)
return info
ppe = Ppe_Detection()
ppe.get_classes()
ppe.print_all()
#############
def inference_image():
img = cv2.imread("sumit_off.jpg")
ppe.detection(img)
cv2.imshow("Image",img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def run_video():
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
try:
info = ppe.detection(f)
except Exception as e:
print("______",e)
cv2.imshow("image",f)
if cv2.waitKey(1) & 0xFF == ord("q") :
break
cap.release()
cv2.destroyAllWindows()
run_video()

How use for loop many variable?

I am using Yolov5. I want change my webcam -> lancamera
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'images'
self.img_size = img_size
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = sources
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened():
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
this code return 'self.sources, img, img0, None'
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
print((dataset))
I use 'dataset'
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
How to use for path, img, im0s, vid_cap in dataset: ??
my lancam code
def livecame():
vimba = Vimba()
vimba.startup()
system = vimba.system()
system.run_feature_command("GeVDiscoveryAllOnce")
time.sleep(0.1)
camera_ids = vimba.camera_ids()
# for cam_id in camera_ids:
# print("Camera found: ", cam_id)
print(camera_ids[0])
c0 = vimba.camera(camera_ids[0])
c0.open()
pixel_format = c0.feature("PixelFormat")
pixel_format.value = "BayerBG8"
try:
c0.StreamBytesPerSecond = 100000000
except:
pass
frame = c0.new_frame()
frame.announce()
c0.start_capture()
try:
frame.queue_for_capture()
success = True
except:
success = False
c0.run_feature_command("AcquisitionStart")
c0.run_feature_command("AcquisitionStop")
frame.wait_for_capture(1000)
frame_data = frame.buffer_data()
k = cv2.waitKey(1)
if k == 0x1b:
cv2.destroyAllWindows()
if success:
img = np.ndarray(buffer=frame_data,
dtype=np.uint8,
shape=(frame.data.height, frame.data.width, 1))
img = cv2.cvtColor(img, cv2.COLOR_BAYER_BG2RGB)
img0 = img.copy()
img = img.tolist()
img = [letterbox(x, new_shape=(800,400), auto= True)[0] for x in img0]
#img = np.ascontiguousarray(img)
img = np.stack(img, 0)
#img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return ['0'], img, img0
but I use dataset = new_file.livecame()
I can see error ValueError: not enough values to unpack (expected 3, got 1)
in for path, img, im0s, vid_cap in dataset:
how to use many variable? in for loop?
In Python OpenCV, one way is simply to use zip.
for component in zip(contours, hierarchy):
cntr = component[0]
hier = component[1]

YUV frame averaging using opencv

I am trying to read a yuv video, then perform frame averaging, however, I run into problems when doing so, my code is shown below (this was made by combining a yuv reader and well as code to split the frame into the separate y, u,v components as well as adding what I thought was the way to proceed with this)
I get an error that the argument should be int or none not float but doesn't reference the line, but I do know this has to do with the reading the yuv file part. I am trying to average every two frames within the video. The YUV file is 10bit, 420
import cv2
import numpy as np
class readYUV:
def __init__(self, filename, size):
self.height, self.width = size
self.frame_len = self.width * self.height * 3 / 2
self.f = open(filename, 'rb')
self.shape = (int(self.height*1.5), self.width)
def read_raw(self):
try:
raw = self.f.read(self.frame_len)
yuv = np.frombuffer(raw, dtype=np.uint8)
yuv = yuv.reshape(self.shape)
except Exception as e:
print (str(e))
return False, None
return True, yuv
def read(self):
ret, yuv = self.read_raw()
if not ret:
return ret, yuv
bgr = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_NV21)
return ret, bgr
def make_lut_u():
return np.array([[[i, 255 - i, 0] for i in range(256)]], dtype=np.uint8)
def make_lut_v():
return np.array([[[0, 255 - i, i] for i in range(256)]], dtype=np.uint8)
# otherwise, split the frame into its respective channels
def splitter(img):
cv2.imshow("frame", img)
cv2.waitKey(30)
img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
y, u, v = cv2.split(img_yuv)
lut_u, lut_v = make_lut_u(), make_lut_v()
# Convert back to BGR so we can apply the LUT and stack the images
y = cv2.cvtColor(y, cv2.COLOR_GRAY2BGR)
u = cv2.cvtColor(u, cv2.COLOR_GRAY2BGR)
v = cv2.cvtColor(v, cv2.COLOR_GRAY2BGR)
u_mapped = cv2.LUT(u, lut_u)
v_mapped = cv2.LUT(v, lut_v)
result = np.vstack([img, y, u_mapped, v_mapped])
return result
if __name__ == "__main__":
filename = "file.yuv"
size = (3840, 2160)
cap = readYUV(filename, size)
while 1:
ret, frame = cap.read()
cv2.waitKey(30)
result1 = splitter(frame)
ret2, frame2 = cap.read()
cv2.waitKey(30)
result2 = splitter(frame2)
result = (result1 +result2)/2
cv2.imwrite('average.png', result)
Thanks in advance

How can I convert pytorch cpu-based transformation to cuda-based?

The original issue for the code is availablehere.
I am using this repository for a line segmentation project and I developed this code to get an input (whether image or video) and draw road lines on it and give it in output:
import argparse
import sys
from time import time, clock
from os.path import splitext, basename, exists
from model import SCNN
from utils.check_extension import is_video, is_image
from utils.transforms import *
# I will put all the necessary code for utils.transforms after this
# ------------------------------------------------ SCNN parameters
time1 = time()
net = SCNN(input_size=(800, 288), pretrained=False)
mean = (0.3598, 0.3653, 0.3662) # CULane mean, std
std = (0.2573, 0.2663, 0.2756)
transform_img = Resize((800, 288))
transform_to_net = Compose(ToTensor(), Normalize(mean=mean, std=std))
# ------------------------------------------------ Arguments
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str,
default='models/vgg_SCNN_DULR_w9.pth',
help='path to vgg models')
parser.add_argument('--input', type=str, default='demo/line_3.mp4',
help='path to image file')
parser.add_argument('--output', type=str, default='public/',
help='path to the output directory')
args = parser.parse_args()
return args
def main():
args = parse_args()
filename, extension = splitext(basename(args.input))
print("Loading file [{}] ....".format(filename))
if not exists(args.input):
print("file [{}] is not recognized".format(args.input))
sys.exit()
if is_video(extension):
video_capture = cv2.VideoCapture()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output = args.output + filename + '.avi'
if video_capture.open(args.input):
property_id = int(cv2.CAP_PROP_FRAME_COUNT)
total_frames = int(cv2.VideoCapture.get(video_capture, property_id))
frame_no = 1
width, height = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), \
int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video_capture.get(cv2.CAP_PROP_FPS)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
save_dict = torch.load(args.weights, map_location=device)
net.load_state_dict(save_dict['net'])
net.eval()
# can't write out mp4, so try to write into an AVI file
video_writer = cv2.VideoWriter(output, fourcc, fps, (width, height))
while video_capture.isOpened():
start = time()
ret, frame = video_capture.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0)
stop1 = time()
print('stop1: ', stop1 - start)
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
stop2 = time()
print('stop2: ', stop2 - stop1)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
lane_img = np.zeros_like(frame)
color = np.array([[255, 125, 0], [0, 255, 0], [0, 0, 255], [0, 255, 255]], dtype='uint8')
coord_mask = np.argmax(seg_pred, axis=0)
for i in range(0, 4):
if exist_pred[0, i] > 0.5:
lane_img[coord_mask == (i + 1)] = color[i]
img = cv2.addWeighted(src1=lane_img, alpha=0.8, src2=frame, beta=1., gamma=0.)
img = cv2.resize(img, (width, height))
stop3 = time()
print('stop3: ', stop3 - stop2)
# if frame_no % 20 == 0:
# print('# {}/{} frames processed!'.format(frame_no, total_frames))
frame_no += 1
video_writer.write(img)
end = time()
print('Whole loop: {} seconds'.format(end - start))
print('------------')
print('------------')
print('# All frames processed ')
video_capture.release()
video_writer.release()
elif is_image(extension):
img = cv2.imread(args.input)
height, width, _ = img.shape
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = transform_img({'img': img})['img']
x = transform_to_net({'img': img})['img']
x.unsqueeze_(0)
save_dict = torch.load(args.weights, map_location='cpu')
net.load_state_dict(save_dict['net'])
net.eval()
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
lane_img = np.zeros_like(img)
color = np.array([[255, 125, 0], [0, 255, 0], [0, 0, 255], [0, 255, 255]], dtype='uint8')
coord_mask = np.argmax(seg_pred, axis=0)
for i in range(0, 4):
if exist_pred[0, i] > 0.5:
lane_img[coord_mask == (i + 1)] = color[i]
img = cv2.addWeighted(src1=lane_img, alpha=0.8, src2=img, beta=1., gamma=0.)
img = cv2.resize(img, (width, height))
output = args.output + filename + '.jpg'
cv2.imwrite(output, img)
else:
print("file format [{}] is not supported".format(args.input))
sys.exit()
if __name__ == '__main__':
main()
The code which belong to Resize, ToTensor, Normalize, Compose are here:
class Compose(CustomTransform):
"""
All transform in Compose should be able to accept two non None variable, img and boxes
"""
def __init__(self, *transforms):
self.transforms = [*transforms]
def __call__(self, sample):
for t in self.transforms:
sample = t(sample)
return sample
def __iter__(self):
return iter(self.transforms)
def modules(self):
yield self
for t in self.transforms:
if isinstance(t, Compose):
for _t in t.modules():
yield _t
else:
yield t
class Normalize(CustomTransform):
def __init__(self, mean, std):
self.transform = Normalize_th(mean, std)
def __call__(self, sample):
img = sample.get('img')
img = self.transform(img)
_sample = sample.copy()
_sample['img'] = img
return _sample
class ToTensor(CustomTransform):
def __init__(self, dtype=torch.float):
self.dtype=dtype
def __call__(self, sample):
img = sample.get('img')
segLabel = sample.get('segLabel', None)
exist = sample.get('exist', None)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).type(self.dtype) / 255.
if segLabel is not None:
segLabel = torch.from_numpy(segLabel).type(torch.long)
if exist is not None:
exist = torch.from_numpy(exist).type(torch.float32) # BCEloss requires float tensor
_sample = sample.copy()
_sample['img'] = img
_sample['segLabel'] = segLabel
_sample['exist'] = exist
return _sample
class Resize(CustomTransform):
def __init__(self, size):
if isinstance(size, int):
size = (size, size)
self.size = size #(W, H)
def __call__(self, sample):
img = sample.get('img')
segLabel = sample.get('segLabel', None)
img = cv2.resize(img, self.size, interpolation=cv2.INTER_CUBIC)
if segLabel is not None:
segLabel = cv2.resize(segLabel, self.size, interpolation=cv2.INTER_NEAREST)
_sample = sample.copy()
_sample['img'] = img
_sample['segLabel'] = segLabel
return _sample
def reset_size(self, size):
if isinstance(size, int):
size = (size, size)
self.size = size
The code works fine but I found out that its too slow for testing in real-time application. I added some time measurement to see if I can find out the bottlenecks and this is the output for one loop:
------------
stop1: 0.002989053726196289
stop2: 1.4032211303710938
stop3: 0.004946708679199219
Whole loop: 1.41636061668396 seconds
These lines happened to be the most computationally expensive lines:
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
Now I am stuck with this issue that how I can modify the code to improve the computation speed.
Initially I thought of modifying the code to allow cuda computation. I asked the main author how I can modify the code for cuda version in here and he pointed out to these lines:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0)
Unfortunately my experience with pytorch is not much, so I am asking for help now.
I hope the information I shared suffices for the readers. Any help would be appreciated
Thanks
Set device:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
what he means his putting the data on device:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0).to(device)

cv2.error: (-215) s >= 0 in function setSize

I get this error while I run my code. I try to catch a picture from a webcam with a raspberry pi but some time the first picture that I catch is empty. So I verify it like this
ret, img = cam.read();
if not ret: continue
What can I do to avoid this error?
Corrupt JPEG data: 1 extraneous bytes before marker 0xd1 OpenCV
Error: Assertion failed (s >= 0) in setSize, file
/home/pi/opencv-3.2.0/modules/core/src/matrix.cpp, line 307 Traceback
(most recent call last): File "total.py", line 248, in
facialReco(directory) File "total.py", line 236, in facialReco
id, dist, it = reco(faceDetec) File "total.py", line 188, in reco
recognizer.predict_collect(gray[yHMax:yHMax + hMax, xHMax:xHMax + wHMax], collector)
cv2.error: /home/pi/opencv-3.2.0/modules/core/src/matrix.cpp:307:
error: (-215) s >= 0 in function setSize
My whole code is:
def reco(faceDetec):
recognizer = cv2.face.createLBPHFaceRecognizer()
recognizer.load("recognizer/trainingData_LBPHF.yml")
id = 0
it = 0
dist = 0
cam = cv2.VideoCapture(0)
# prendre les rectangle ayant la plus grde largeur seulement.
while it < 20:
ret, img = cam.read();
if not ret: continue
cv2.imshow("Face", img);
cv2.waitKey(1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = faceDetec.detectMultiScale(
img,
scaleFactor=1.2,
minNeighbors=7,
minSize=(50, 50)
)
hMax=0
wHMax=0
xHMax=0
yHMax=0
for (x, y, w, h) in faces:
if h>hMax:
hMax=h
wHMax=w
xHMax=x
yHMax=y
collector = cv2.face.StandardCollector_create()
recognizer.predict_collect(gray[yHMax:yHMax + hMax, xHMax:xHMax + wHMax], collector)
if collector.getMinDist()<65:
it += 1
dist = dist + collector.getMinDist()
id = collector.getMinLabel()
numberOfRec(id)
cam.release()
cv2.destroyAllWindows()
req="SELECT studentId FROM Student WHERE numberOfRec=(SELECT MAX(numberOfRec) FROM Student);"
cursor.execute(req)
rows = cursor.fetchall()
for row in rows:
id=row[0]
req="UPDATE Student SET numberOfRec = %(numberOfRec)"
values = {"numberOfRec": 0}
cursor.execute(req, values)
db.commit()
return id, dist, it
I manage to correct the error by adding : 'if not img is None:'
def reco(faceDetec):
recognizer = cv2.face.createLBPHFaceRecognizer()
recognizer.load("recognizer/trainingData_LBPHF.yml")
id = 0
it = 0
dist = 0
cam = cv2.VideoCapture(0)
# prendre les rectangle ayant la plus grde largeur seulement.
while it < 20:
ret, img = cam.read();
if not img is None:
if not ret: continue
cv2.imshow("Face", img);
cv2.waitKey(1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = faceDetec.detectMultiScale(
img,
scaleFactor=1.2,
minNeighbors=7,
minSize=(50, 50)
)
hMax=0
wHMax=0
xHMax=0
yHMax=0
for (x, y, w, h) in faces:
if h>hMax:
hMax=h
wHMax=w
xHMax=x
yHMax=y
collector = cv2.face.StandardCollector_create()
recognizer.predict_collect(gray[yHMax:yHMax + hMax, xHMax:xHMax + wHMax], collector)
if collector.getMinDist()<65:
it += 1
dist = dist + collector.getMinDist()
id = collector.getMinLabel()
numberOfRec(id)
cam.release()
cv2.destroyAllWindows()
req="SELECT studentId FROM Student WHERE numberOfRec=(SELECT MAX(numberOfRec) FROM Student);"
cursor.execute(req)
rows = cursor.fetchall()
for row in rows:
id=row[0]
req="UPDATE Student SET numberOfRec = %(numberOfRec)"
values = {"numberOfRec": 0}
cursor.execute(req, values)
db.commit()
return id, dist, it

Categories

Resources