Tensorflow Object detection API: Print detected class as output

Tensorflow Object detection API: Print detected class as output - python

I am using the TF Object detection API to detect images, it is working fine and given an image it will draw the bounding box with a label and confidence score. My question is how to print the detected class (as a string) i.e not just on the image but as an output to the terminal too.
This is the code of detection in real time.
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while True:
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0),
dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=3,
min_score_thresh=.5,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release()
break

Classes will be encrypted in category_index variable.Use the below code snippet to get the detected class.
print ([category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > 0.5])

Code:
my_classes = detections['detection_classes'][0].numpy() + label_id_offset
my_scores = detections['detection_scores'][0].numpy()
min_score = 0.5
print([category_index[value]['name']
for index,value in enumerate(my_classes)
if my_scores[index] > min_score
])
Sample output:
['person', 'cell phone', 'remote']

Related

Tensorflow object detections in real time

Tensorflow object-detection Want to add detected object name in .csv file format while real time detection is going on
Its just detecting objects
but I want all detected objects names in another file i tired to add it. but all i got is bulk of arrays only with .float32 & .int64
code
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while cap.isOpened():
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=5,
min_score_thresh=.8,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break

How can i close video window with timer?

I am using Mask detect code from github(I changed little) and trying to close video window with timer(or function like timer). This code detect mask and show text "Mask", or "No Mask". I want to close window when label showed "Mask" for 5seconds.
I tried using timer code
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
but it doesn't work well. There are 3 problems with it.
Timer ran but it does not work in seconds.
2.And timer should stop when count become 5, but it goes up to 9 then window close.
3. When label showed "Mask", video stop.
I want to know how to close window with timer in this code, and how to fix these 3 problems.
I would be really appreciate for your help
from imutils import video
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os
import threading
from tensorflow.python.ops.math_ops import truediv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
(104.0, 180.0, 124.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--face", type=str,
default="face_detector",
help="path to face detector model directory")
ap.add_argument("-m", "--model", type=str,
default="mask_detector.model",
help="path to trained face mask detector model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
"res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
vs.set(cv2.CAP_PROP_FRAME_WIDTH,640)
vs.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
time.sleep(1.0)
count = 0
Maskon = False
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
check, frame = vs.read()
if isinstance(frame, np.ndarray):
pass
else:
continue
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
if label == "Mask":
Maskon = True
else:
Maskon = False
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
if Maskon == True:
startTimer()
# show the output frame
cv2.imshow('Frame', frame)
key = cv2.waitKey(1) & 0xFF
if count == 5:
break
# do a bit of cleanup
vs.release()
cv2.destroyAllWindows()

I made it!
Not working in seconds, but it works how i wanted.
from imutils import video
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os
import threading
from tensorflow.python.ops.math_ops import truediv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
(104.0, 180.0, 124.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# ensure the bounding boxes fall within the dimensions of
# the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w - 1, endX), min(h - 1, endY))
# extract the face ROI, convert it from BGR to RGB channel
# ordering, resize it to 224x224, and preprocess it
face = frame[startY:endY, startX:endX]
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective
# lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make a predictions if at least one face was detected
if len(faces) > 0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
faces = np.array(faces, dtype="float32")
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--face", type=str,
default="face_detector",
help="path to face detector model directory")
ap.add_argument("-m", "--model", type=str,
default="mask_detector.model",
help="path to trained face mask detector model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
"res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the face mask detector model from disk
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
vs.set(cv2.CAP_PROP_FRAME_WIDTH,640)
vs.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
time.sleep(1.0)
count = 0
Maskon = False
def startTimer():
global count
timer = threading.Timer(1, startTimer)
timer.start()
print(count)
count += 1
if count > 5:
timer.cancel()
# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
check, frame = vs.read()
if isinstance(frame, np.ndarray):
pass
else:
continue
# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# determine the class label and color we'll use to draw
# the bounding box and text
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
if label == "Mask":
Maskon = True
else:
Maskon = False
# include the probability in the label
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
# display the label and bounding box rectangle on the output
# frame
cv2.putText(frame, label, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
# show the output frame
cv2.imshow('Frame', frame)
key = cv2.waitKey(1) & 0xFF
if Maskon == True:
startTimer()
else:
count = 0
if count > 70:
break
# do a bit of cleanup
vs.release()
cv2.destroyAllWindows()
This code does not work in seconds, and i am still trying to make it work in seconds.
I'd really appreciate for your help

How to draw rectangle in opencv around fire detected object from keras model

This code is for detecting fire using tensorflow keras model and I do not know how to draw rectangle around the fire detected object:
model = tf.keras.models.load_model('my_model\\InceptionV3.h5')
video = cv2.VideoCapture(0)
while True:
_, frame = video.read()
im = Image.fromarray(frame, 'RGB')
im = im.resize((224, 224))
img_array = image.img_to_array(im)
img_array = np.expand_dims(img_array, axis=0) / 255
probabilities = model.predict(img_array)[0]
prediction = np.argmax(probabilities)
# if prediction is 0, which means there is fire in the frame.
if prediction == 0:
img = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
print(probabilities[prediction])
cv2.imshow("Capturing", frame)
cv2.imshow("Capturing", img)
if cv2.waitKey(1) & 0xff == ord('q'):
break
video.release()
cv2.destroyAllWindows()

OpenCv facial recognition app detection problem

This is a facial recognition app I had created using python , opencv and haar cascade classifier, the app is working good in classifying trained persons, however the app has a problem of detecting unknown persons as a known person who is trained previously , How to fix such problem ?
this is the dataset creation code
import cv2
import os
import time
cam = cv2.VideoCapture(0)
cam.set(3, 640) # set video width
cam.set(4, 480) # set video height
face_detector = cv2.CascadeClassifier('Cascades/haarcascade_frontalface_default.xml')
# For each person, enter one numeric face id
face_id = input('\n enter user id end press <return> ==> ')
print("\n [INFO] Initializing face capture. Look the camera and wait ...")
# Initialize individual sampling face count
count = 0
while(True):
ret, img = cam.read()
img = cv2.flip(img, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_detector.detectMultiScale(gray, 1.3, 5)
width_d, height_d = 150, 150
for (x,y,w,h) in faces:
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
count += 1
# Save the captured image into the datasets folder
cv2.imwrite("dataset/User." + str(face_id) + '.' + str(count) + ".jpg", cv2.resize(gray[y:y+h,x:x+w] , (width_d, height_d)))
cv2.imshow('image', img)
k = cv2.waitKey(100) & 0xff # Press 'ESC' for exiting video
if k == 27:
break
elif count >= 400: # Take 30 face sample and stop video
break
# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
This is the training phase
import cv2
import numpy as np
from PIL import Image
import os
# Path for face image database
path = 'dataset'
recognizer = cv2.face.LBPHFaceRecognizer_create()
detector = cv2.CascadeClassifier("Cascades/haarcascade_frontalface_default.xml");
# function to get the images and label data
def getImagesAndLabels(path):
width_d, height_d = 150, 150 # Declare your own width and height
imagePaths = [os.path.join(path,f) for f in os.listdir(path)]
faceSamples=[]
ids = []
for imagePath in imagePaths:
PIL_img = Image.open(imagePath).convert('L') # convert it to grayscale
img_numpy = np.array(PIL_img,'uint8')
id = int(os.path.split(imagePath)[-1].split(".")[1])
faces = detector.detectMultiScale(img_numpy)
for (x,y,w,h) in faces:
faceSamples.append(cv2.resize(img_numpy[y:y+h,x:x+w], (width_d, height_d)))
ids.append(id)
return faceSamples,ids
print ("\n [INFO] Training faces. It will take a few seconds. Wait ...")
faces,ids = getImagesAndLabels(path)
recognizer.train(faces, np.array(ids))
# Save the model into trainer/trainer.yml
recognizer.write('trainer/trainer.yml') # recognizer.save() worked on Mac, but not on Pi
# Print the numer of faces trained and end program
print("\n [INFO] {0} faces trained. Exiting Program".format(len(np.unique(ids))))
This is the recognizing phase
import cv2
import numpy as np
import os
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "Cascades/haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath);
font = cv2.FONT_HERSHEY_SIMPLEX
#iniciate id counter
id =0
# names related to ids: example ==> Marcelo: id=1, etc
names = ['Mamdouh Alaa' , 'Dr.Ahmed Seddawy' , 'Dr.Ismail Abdulghaffar']
# Initialize and start realtime video capture
cam = cv2.VideoCapture(0)
cam.set(3, 1366) # set video widht
cam.set(4, 768) # set video height[enter image description here][1]
# Define min window size to be recognized as a face
minW = 0.1*cam.get(3)
minH = 0.1*cam.get(4)
while True:
ret, img =cam.read()
img = cv2.flip(img, 1)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
width_d, height_d = 150, 150
faces = faceCascade.detectMultiScale(
gray,
scaleFactor = 1.2,
minNeighbors = 5,
minSize = (int(minW), int(minH)),
)
for(x,y,w,h) in faces:
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
id, confidence = recognizer.predict(cv2.resize(gray[y:y+h,x:x+w], (width_d, height_d)))
# Check if confidence is less them 100 ==> "0" is perfect match
if (confidence < 100) :
id = names[id]
confidence = " {0}%".format(round(100 - confidence))
else:
id = "unknown person"
confidence = " {0}%".format(round(100 - confidence))
cv2.putText(img, str(id), (x+5,y-5), font, 1, (255,255,255), 2)
cv2.putText(img, str(confidence), (x+5,y+h-5), font, 1, (255,255,0), 1)
cv2.imshow('IFR',img)
k = cv2.waitKey(10) & 0xff # Press 'ESC' for exiting video
if k == 27:
break
# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()

Convert Numpy Array to Video(mp4)

I have a video(mp4) that I read into a Numpy Array and edit. Now I want to convert the array back to a video
I already tried to use VideoWriter but the created mp4 was always corrupt
frame_array = []
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while cap.isOpened():
ret, image_np = cap.read()
if ret == True:
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
i=i+1
frame_array.append(image_np)
cv2.imwrite('pictures/'+ str(i) + '.jpg',image_np)
else:
break
print("Everything detected")
out = cv2.VideoWriter('video.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 30, (640, 480))
for i in frame_array:
out.write(i)
out.release()
The created mp4 was always corrupt and couldnt be opened. Every saved Picture with cv2.imwrite is okay and viewable

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow Object detection API: Print detected class as output - python

Classes will be encrypted in category_index variable.Use the below code snippet to get the detected class. print ([category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > 0.5])

Related

Tensorflow object detections in real time

How can i close video window with timer?

How to draw rectangle in opencv around fire detected object from keras model

OpenCv facial recognition app detection problem

Convert Numpy Array to Video(mp4)

Categories

Resources