I am using Mask detect code from github(I changed little) and trying to close video window with timer(or function like timer). This code detect mask and show text "Mask", or "No Mask". I want to close window when label showed "Mask" for 5seconds.
I tried using timer code
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
but it doesn't work well. There are 3 problems with it.
Timer ran but it does not work in seconds.
2.And timer should stop when count become 5, but it goes up to 9 then window close.
3. When label showed "Mask", video stop.
I want to know how to close window with timer in this code, and how to fix these 3 problems.
I would be really appreciate for your help
from imutils import video
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os
import threading
from tensorflow.python.ops.math_ops import truediv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
(104.0, 180.0, 124.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--face", type=str,
default="face_detector",
help="path to face detector model directory")
ap.add_argument("-m", "--model", type=str,
default="mask_detector.model",
help="path to trained face mask detector model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
"res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
vs.set(cv2.CAP_PROP_FRAME_WIDTH,640)
vs.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
time.sleep(1.0)
count = 0
Maskon = False
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
check, frame = vs.read()
if isinstance(frame, np.ndarray):
pass
else:
continue
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
if label == "Mask":
Maskon = True
else:
Maskon = False
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
if Maskon == True:
startTimer()
# show the output frame
cv2.imshow('Frame', frame)
key = cv2.waitKey(1) & 0xFF
if count == 5:
break
# do a bit of cleanup
vs.release()
cv2.destroyAllWindows()
I made it!
Not working in seconds, but it works how i wanted.
from imutils import video
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os
import threading
from tensorflow.python.ops.math_ops import truediv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
(104.0, 180.0, 124.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--face", type=str,
default="face_detector",
help="path to face detector model directory")
ap.add_argument("-m", "--model", type=str,
default="mask_detector.model",
help="path to trained face mask detector model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
"res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
vs.set(cv2.CAP_PROP_FRAME_WIDTH,640)
vs.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
time.sleep(1.0)
count = 0
Maskon = False
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
check, frame = vs.read()
if isinstance(frame, np.ndarray):
pass
else:
continue
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
if label == "Mask":
Maskon = True
else:
Maskon = False
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
# show the output frame
cv2.imshow('Frame', frame)
key = cv2.waitKey(1) & 0xFF
if Maskon == True:
startTimer()
else:
count = 0
if count > 70:
break
# do a bit of cleanup
vs.release()
cv2.destroyAllWindows()
This code does not work in seconds, and i am still trying to make it work in seconds.
I'd really appreciate for your help
Related
I try to use TFlite for my facemask recognition project. But the problem is it has errors. The problem can Stream. However, if it detects a human face/facemask the error will terminate the system.
Here's the error
Traceback (most recent call last):
File "/home/pi/Desktop/fm_detection/fm_tflite.py", line 113, in <module>
(mask, withoutMask) = pred
TypeError: cannot unpack non-iterable numpy.float32 object
Here's my coding
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
from gpiozero import LED, Buzzer
import numpy as np
import imutils
import time
import cv2
import os
import sys
led = LED(19)
buzzer = Buzzer(26)
def detect_and_predict_mask(frame, faceNet, interpreter):
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
(104.0, 177.0, 123.0))
faceNet.setInput(blob)
detections = faceNet.forward()
print(detections.shape)
faces = []
locs = []
preds = []
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
faces.append(face)
locs.append((startX, startY, endX, endY))
if len(faces) > 0:
faces = np.array(faces, dtype="float32")
#preds = maskNet.predict(faces, batch_size=32)
interpreter.allocate_tensors()
interpreter.set_tensor(interpreter.get_input_details()[0]["index"], faces)
interpreter.invoke()
preds = interpreter.tensor(interpreter.get_output_details()[0]["index"])()[0]
return (locs, preds)
# load our serialized facemask model from disk
prototxtPath = "/home/pi/Desktop/fm_detection/face_detector/deploy.prototxt"
weightsPath = "/home/pi/Desktop/fm_detection/face_detector/res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
#maskNet = load_model("facemask_model.model")
interpreter = tf.lite.Interpreter("facemask_model.tflite")
# initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
# loop over the frames
while True:
frame = vs.read()
frame = imutils.resize(frame, width=500)
(locs, preds) = detect_and_predict_mask(frame, faceNet, interpreter)
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
led.on() if mask > withoutMask else led.off()
buzzer.beep(0.1, 0.1, 1) if mask > withoutMask else led.off()
time.sleep(2)
led.off() if mask > withoutMask else led.off()
os.system("espeak -s 130 'THANK YOU'") if mask > withoutMask else led.off()
sys.exit() if sw2Press else buzzer.off()
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
cv2.destroyAllWindows()
vs.stop()
The program will be stuck at the moment it detects the human face. Hope someone can help me.
Thank You
THIS IS MY PYTHON CODE SO FAR,
This code generates a live stream video from the webcam and detects facemask and no facemask people,
now I want to take pictures of people who are not wearing face mask and store images of those in the local directory.
please help me to find the solution to this issue
I already have a data set of no mask and mask
now I want to take picture of those not wearing face mask and save in the local directory using the webcam live videostram
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import imutils
import time
import cv2
import os
import time
lowConfidence = 0.75
def detectAndPredictMask(frame, faceNet, maskNet):
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
(104.0, 177.0, 123.0))
faceNet.setInput(blob)
detections = faceNet.forward()
faces = []
locs = []
preds = []
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > lowConfidence:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
faces.append(face)
locs.append((startX, startY, endX, endY))
if len(faces) > 0:
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
return (locs, preds)
prototxtPath = r"deploy.prototxt"
weightsPath = r"res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
maskNet = load_model("mask_detector.model")
vs = VideoStream(src=0).start()
while True:
frame = vs.read()
frame = imutils.resize(frame, width=900)
(locs, preds) = detectAndPredictMask(frame, faceNet, maskNet)
for (box, pred) in zip(locs, preds):
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
if label =="Mask":
print("MASK DETECTED")
else:
print("MASK NOT DETECETED")
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
cv2.imshow("PROMENADE FACE MASK DETECTOR", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
cv2.destroyAllWindows()
vs.stop()
cv2.imwrite('path_to_desktop/filename.jpg', face)
read more about imwrite() here
Within detectAndPredictMask your code is looping over detections and is extracting a slice of frame and assigning it to face (face = frame[startY:endY, startX:endX]). face contains a OpenCV compatible image. Use imwrite before face = img_to_array(face) as there image is trasformed to a Numpy array.
I am to create a function that detect face using a face detection model (face_model) and use a face_mask model I trained to return position of face and prediction from face_mask prediction.
But when using video only one face is detected at a time in each frame.
Here is my Code
def detect_and_predict_mask(video_frame, face_model, mask_model):
(h, w) = video_frame.shape[:2]
blob = cv2.dnn.blobFromImage(video_frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
# pass the blob through the network and obtain the face detections
face_model.setInput(blob)
detections = face_model.forward()
print(detections.shape)
faces = []
locations = []
predictions = []
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > CONFIDENCE:
face_box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(start_x, start_y, end_x, end_y) = face_box.astype("int")
(start_x, start_y) = (max(0, start_x), max(0, start_y))
(end_x, end_y) = (min(w - 1, end_x), min(h - 1, end_y))
face = video_frame[start_y:end_y, start_x:end_x]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
faces.append(face)
locations.append((start_x, start_y, end_x, end_y))
print(f'FACES: {len(faces)}')
print(f'locations: {locations}')
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all* faces at the same time rather than one-by-one predictions in the above `for` loop
faces = np.array(faces, dtype="float32")
predictions = mask_model.predict(faces, batch_size=32)
return locations, predictions
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os
def detect_and_predict_mask(frame, faceNet, maskNet):
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
(104.0, 177.0, 123.0))
faceNet.setInput(blob)
detections = faceNet.forward()
faces = []
locs = []
preds = []
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > args["confidence"]:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
face = np.expand_dims(face, axis=0)
faces.append(face)
locs.append((startX, startY, endX, endY))
if len(faces) > 0:
preds = maskNet.predict(faces)
return (locs, preds)
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--face", type=str,
default="face_detector",
help="path to face detector model directory")
ap.add_argument("-m", "--model", type=str,
default="mask_detector.model",
help="path to trained face mask detector model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
"res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
while True:
frame = vs.read()
frame = imutils.resize(frame, width=400)
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
for (box, pred) in zip(locs, preds):
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
cv2.destroyAllWindows()
vs.stop()
Is there any modules that I need to import
Or is it all imported correctly.
I do not debug it
I reformat file one time.
I have got all of the needed xml and jpg files.
I have import all of the modules that is needed... I think so
Below is the error
Can you help me pls Im new to OpenCV and got an complicated error like this
Thanks to all of Stackoverflow community for help:)
C:\Users\Toshiba\Desktop\python_temelleri\venv\Scripts\python.exe C:/Users/Toshiba/Desktop/python_temelleri/detect_mask_video.py
2020-08-28 16:47:05.729423: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found
2020-08-28 16:47:05.730054: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
File "C:/Users/Toshiba/Desktop/python_temelleri/detect_mask_video.py", line 60, in <module>
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
cv2.error: OpenCV(4.4.0) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-2b5g8ysb\opencv\modules\dnn\src\caffe\caffe_io.cpp:1121: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "face_detector\deploy.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'
[INFO] loading face detector model...
Process finished with exit code 1
You either don't provide models or the paths to the model are not correct. print the values of prototxtPath and weightsPath to check if you provide correct paths to the models.
I am working on a project where I am using cv2 for multi-object detection and tracking. I was able to detect objects in a live feed and I was also able to track them with in the frame.
For example:
Two Persons got detected in the frame and we assigned two trackers for each one of them say Person0, Person1
Till here it is working fine.
Challenge is when first person leaves, corresponding tracker gets deleted and second person tracker ID gets changed to Person 0, But I dont want that to be chnaged to Person 0, it should be Person1
Below is the code I've tried:
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from math import floor
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe('./models/MobileNetSSD_deploy.prototxt.txt', './models/MobileNetSSD_deploy.caffemodel')
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
time.sleep(2.0)
fps = FPS().start()
init_once = []
oklist = []
tracker_lst = []
newbox = []
# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=900,height = 600)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),0.007843, (300, 300), 127.5)
net.setInput(blob)
detections = net.forward()
bounding_boxes = []
for i in np.arange(0,detections.shape[2]):
confidence = detections[0,0,i,2]
if confidence > 0.2:
idx = int(detections[0,0,i,1])
if CLASSES[idx] == 'person':
box = detections[0,0,i,3:7]*np.array([w,h,w,h])
bounding_boxes.append(box)
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
print("[INFO] {}".format(label))
(startX, startY, endX, endY) = box.astype(int)
cv2.rectangle(frame, (startX, startY), (endX, endY),COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(frame, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# if len(init_once) < 1:
# bbox = bounding_boxes
print("####Detected bounding boxes:", bounding_boxes)
# loop over the detections
newbbox = []
bbox = bounding_boxes
for (i,boxie) in enumerate(bbox):
(startX,startY,endX,endY) = boxie.astype(int)
boxie = (int(startX),int(startY),int(endX-startX),int(endY-startY))
newbbox.append(boxie)
#print (newbbox)
if len(bounding_boxes):
tracker_lst.append(cv2.TrackerKCF_create())
okinit = tracker_lst[i].init(frame, boxie)
init_once.append(1)
oklist.append(okinit)
print (len(tracker_lst))
oklist[i], newbbox[i] = tracker_lst[i].update(frame)
for (i,newbox) in enumerate (newbbox) :
if oklist[i]:
Tracker_ID = "{}: {:.2f}%".format('person'+str(i), confidence * 100)
cv2.rectangle(frame,(int(floor(newbox[0])),int(floor(newbox[1]))), (int(floor(newbox[0]))+int(floor(newbox[2])),int(floor(newbox[1]))+int(floor(newbox[3]))),(255,255,255), 1)
new_y = floor(newbox[1]) - 15 if floor(newbox[1]) - 15 > 15 else floor(newbox[1]) + 15
print (newbox[0])
print (newbox[1])
cv2.putText(frame, Tracker_ID, (int(floor(newbox[0])), int(floor(newbox[1]))),cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1)
else :
cv2.putText(frame, "Tracking failure object {0}".format(i), (100,(80+(20*i))), cv2.FONT_HERSHEY_SIMPLEX, 0.4,(255,255,255),1)
if len(bounding_boxes)>0:
del tracker_lst[i]
del oklist[i]
#oklist[i], newbbox[i] = tracker_lst[i].update(frame)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counte
fps.update()
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# do a bit of cleanup
cv2.destroyAllWindows()
vs.release()
How do I know, which objects got dropped from the frame and remove the tracker only for that, with out changing tracker ID of retained images in the frame