I am trying to call my Face Recognition model implemented in keras, using flask API. I am unable to call the model using different cam urls as a parameter.
I am getting the following error:
TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder_50:0", shape=(3, 3, 3, 32), dtype=float32) is not an element of this graph. - - [23/Nov/2022 13:39:49] "GET /api/recognise?url=rtsp://admin:inndata123# HTTP/1.1" 500 -
I found that creating a new session for each thread, but I don't have any idea where to place those lines in my code.
# running db and email functions in background and parallalized action and bbox dist loops
import json
import os
import pickle
import cv2
import imutils
import dlib
import torch
import time
import numpy as np
import datetime
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image, ImageFont, ImageDraw
from script.fx import prewhiten, l2_normalize
from keras.models import load_model
from scipy.spatial import distance
from mtcnn.mtcnn import MTCNN
from script.generate_data import generate_embeddings
import mysql.connector
from mysql.connector import (connection)
import smtplib
import mimetypes
from email.message import EmailMessage
message = EmailMessage()
import tensorflow as tf
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
from flask import Flask, jsonify, request,render_template,Response
app = Flask(__name__)
global graph
graph = tf.get_default_graph()
sess = tf.Session(graph=graph, config=session_conf)
model_path = './data/model/facenet_keras.h5'
font_path = './data/font/Calibri Regular.ttf'
embedding_path = './data/arrays/embeddings.npz'
vars_path = './data/arrays/vars.npz'
curr_time = datetime.datetime.now()
time_date = curr_time.strftime('%Y-%m-%d %H:%M:%S')
only_date= curr_time.strftime('%Y-%m-%d')
login_time = curr_time.replace(hour=8, minute=0, second=0, microsecond=0)
logout_time = curr_time.replace(hour=17, minute=15, second=0, microsecond=0)
if os.path.exists(embedding_path) == True:
print('Loadings embeddings...')
loaded_embeddings = np.load(embedding_path)
embeddings, names = loaded_embeddings['a'], loaded_embeddings['b']
loaded_vars = np.load(vars_path)
slope, intercept = loaded_vars['a'], loaded_vars['b']
print('Creatings embeddings...')
loaded_embeddings = np.load(embedding_path)
embeddings, names = loaded_embeddings['a'], loaded_embeddings['b']
loaded_vars = np.load(vars_path)
slope, intercept = loaded_vars['a'], loaded_vars['b']
frame_count = 0
frame_number = 0
bbox_centers = []
log_in = []
log_out = []
date_list = []
mins_lst = []
def recognise():
url = request.args.get('url')
if url!=str(0):
video_sources = cv2.VideoCapture(url)
detector = MTCNN()
model = load_model(model_path, compile=False)
graph = tf.get_default_graph()
def inner():
frame_count = 0
frame_number = 0
while 1:
start= time.time()
var, frame = video_sources.read()
if frame is not None:
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
# frame = cv2.resize(frame, (1500, 1000))
if frame_count % 10 == 0 and rgb_small_frame is not None:
faces = detector.detect_faces(rgb_small_frame) # result
print('faces :',len(faces))
for result in faces:
x_face, y_face, w_face, h_face = result['box']
x_face = x_face * 4
y_face = y_face * 4
w_face = w_face * 4
h_face = h_face * 4
#face bbox tuples
#zone bbox tuples
zone_tuple1 = (950, 700)
zone_tuple2 = (2000, 1050)
# Margins for Face box
dw = 0.1 * w_face
dh = 0.2 * h_face
#center = (x_face + w_face // 2, y_face + h_face // 2)
#cv2.rectangle(frame, zone_tuple1, zone_tuple2, (255, 0, 0), 2)
#if (all(x > y for x, y in zip(face_tuple1, zone_tuple1)))==True and (all(x < y for x, y in zip(face_tuple2, zone_tuple2)))==True:
# radius=2
with graph.as_default():
dist = []
for i in range(len(embeddings)):
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face], (160, 160)).reshape(
-1, 160,
embeddings[i].reshape(1, 128)))
dist = np.array(dist)
if os.path.exists(only_date + '.txt') == False:
f = open(only_date + '.txt', "a+")
if dist.min() > 1.20:
log = 'Unauthorized Entry'
emp_id = 'None'
f1 = open("unauthorised.txt", "a")
elif dist.min() <= 1:
emp_id = names[dist.argmin()]
if int(emp_id) not in log_in and curr_time >= login_time:
log = 'punch-in'
f2 = open(only_date + '.txt', "a")
if int(emp_id) in log_in and curr_time >= logout_time and int(emp_id) not in log_out:
# and center[0] > 750 and center[0] > 960:
log = 'punch-out'
f2 = open(only_date + '.txt', "a")
emp_id = 'None'
log = 'unidentified'
if emp_id != 'unauthorized' and emp_id != 'unidentified':
font_size = int(
slope[dist.argmin()] * ((w_face + 2 * dw) // 3) * 2 + intercept[dist.argmin()])
color = (0, 255, 0)
elif emp_id == 'unauthorized':
font_size = int(
slope[dist.argmin()] * ((w_face + 2 * dw) // 3) * 2 + intercept[dist.argmin()])
color = (0, 0, 255)
font_size = int(0.1974311 * ((w_face + 2 * dw) // 3) * 2 + 0.03397702412218706)
color = (0, 255, 0)
font = ImageFont.truetype(font_path, font_size)
size = font.getbbox(emp_id)
pt1=(x_face - int(np.floor(dw)), (y_face - int(np.floor(dh)))),
(x_face + w_face + int(np.ceil(dw))), (y_face + h_face + int(np.ceil(dh)))),
color=(0, 255, 0),
thickness=2) # Face Rectangle
pt1=(x_face - int(np.floor(dw)), y_face - int(np.floor(dh)) - size[1]),
pt2=(x_face + size[0], y_face - int(np.floor(dh))),
color=(0, 255, 0),
img = Image.fromarray(frame)
draw = ImageDraw.Draw(img)
draw.text((x_face - int(np.floor(dw)), y_face - int(np.floor(dh)) - size[1]), emp_id,
frame = np.array(img)
if emp_id == 'unauthorized':
frame_name = f'{emp_id}_{frame_number}.jpg'
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face],
(250, 250)))
elif emp_id != 'unauthorised' and emp_id != 'unidentified':
frame_name = f'{emp_id}_{frame_number}.jpg'
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face],
(250, 250)))
# add_attachment(frame_name)
frame_number += 1
end = time.time()
if log != 'unidentified':
data = {'emp_id': emp_id, 'date': time_date, 'log': log}
yield json.dumps(data) + "\n"
# cv2.imshow('Frame', cv2.resize(frame, (950, 950)))
if cv2.waitKey(15) & 255 == ord('q'):
return Response(inner(), mimetype='application/json')
if __name__=='__main__':
This is my face recognition model integrated in flask.
given below is the code that is available on the website as well as on the tutorial videohttps://www.youtube.com/watch?v=wa2ARoUUdU8&t=118s
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import time
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
offset = 20
imgSize = 300
folder = "Data/C"
counter = 0
while True:
success, img = cap.read()
hands, img = detector.findHands(img)
if hands:
hand = hands[0]
x, y, w, h = hand['bbox']
imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
imgCropShape = imgCrop.shape
aspectRatio = h / w
if aspectRatio > 1:
k = imgSize / h
wCal = math.ceil(k * w)
imgResize = cv2.resize(imgCrop, (wCal, imgSize))
imgResizeShape = imgResize.shape
wGap = math.ceil((imgSize - wCal) / 2)
imgWhite[:, wGap:wCal + wGap] = imgResize
k = imgSize / w
hCal = math.ceil(k * h)
imgResize = cv2.resize(imgCrop, (imgSize, hCal))
imgResizeShape = imgResize.shape
hGap = math.ceil((imgSize - hCal) / 2)
imgWhite[hGap:hCal + hGap, :] = imgResize
cv2.imshow("ImageCrop", imgCrop)
cv2.imshow("ImageWhite", imgWhite)
cv2.imshow("Image", img)
key = cv2.waitKey(1)
if key == ord("s"):
counter += 1
cv2.imwrite(f'{folder}/Image_{time.time()}.jpg', imgWhite)
there is a traceback error while running this code(probably problem in importing cvzone module)(screenshots attached)screenshot of error
I tried reinstalling the packages with the same version, used in the video.
How do I deal with this problem?
my project uses object detection by pixel color to identify some things, and I would like the mouse to click automatically with left mouse button when the object that was identified passes over my mouse cursor, how do I do that ? preferably using mouse_event
import time
import win32api
from win32gui import GetDC
from PIL import ImageGrab
import numpy as np
import overlay
def filter_detection_result(matrix, result):
mask = np.isin(matrix, result)
detection_matrix = np.zeros(matrix.shape, dtype=int)
np.place(detection_matrix, mask, result)
return detection_matrix
def rgb_2_int(rgb):
rgb_int = rgb[0]
rgb_int = (rgb_int << 8) + rgb[1]
rgb_int = (rgb_int << 8) + rgb[2]
return rgb_int
def img_2_matrix(img):
rgb_matrix = np.asarray(img).reshape((radius * 2) ** 2, 3)
int_matrix = np.array(list(map(rgb_2_int, rgb_matrix)))
return int_matrix.reshape((radius * 2), (radius * 2))
def detection(field_of_view):
matrix = img_2_matrix(ImageGrab.grab(field_of_view))
result = np.asarray(np.intersect1d(matrix, config))
if result.size >= 120:
detected_matrix = filter_detection_result(matrix, result)
i, j = np.where(detected_matrix != 0)
_overlay.create_box(fov, win32api.RGB(255, 255, 255))
_overlay = overlay.Overlay(GetDC(0))
radius = 20
config = np.loadtxt('config.txt', dtype=int, delimiter='\n')
print('[+] loaded')
except Exception as err:
while True:
x, y = win32api.GetCursorPos()
fov = (x - radius, y - radius, x + radius, y + radius)
I'm setting up a PPE Detection module using OpenVINO in my Ubuntu 18.04. Although the video input worked well with my webcam dev/video/0 but I wish it can be change to RTSP input. Whenever I put my RTSP Url inside the config.json it doesnt work and show me Either wrong input path or empty line is found. Please check the conf.json file.
Here is the main.py
#!/usr/bin/env python3
from __future__ import print_function
import sys
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import datetime
import json
from inference import Network
# Global vars
cpu_extension = ''
conf_modelLayers = ''
conf_modelWeights = ''
conf_safety_modelLayers = ''
conf_safety_modelWeights = ''
targetDevice = "CPU"
conf_batchSize = 1
conf_modelPersonLabel = 1
conf_inferConfidenceThreshold = 0.7
conf_inFrameViolationsThreshold = 19
conf_inFramePeopleThreshold = 5
use_safety_model = False
padding = 30
viol_wk = 0
acceptedDevices = ['CPU', 'GPU', 'MYRIAD', 'HETERO:FPGA,CPU', 'HDDL']
videos = []
name_of_videos = []
CONFIG_FILE = '../resources/config.json'
is_async_mode = True
class Video:
def __init__(self, idx, path):
if path.isnumeric():
self.video = cv2.VideoCapture(int(path))
self.name = "Cam " + str(idx)
if os.path.exists(path):
self.video = cv2.VideoCapture("rtsp://edwin:Passw0rd#")
self.name = "Video " + str(idx)
print("Either wrong input path or empty line is found. Please check the conf.json file")
if not self.video.isOpened():
print("Couldn't open video: " + path)
self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
self.currentViolationCount = 0
self.currentViolationCountConfidence = 0
self.prevViolationCount = 0
self.totalViolations = 0
self.totalPeopleCount = 0
self.currentPeopleCount = 0
self.currentPeopleCountConfidence = 0
self.prevPeopleCount = 0
self.currentTotalPeopleCount = 0
cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
self.frame_start_time = datetime.datetime.now()
def get_args():
Parses the argument.
:return: None
global is_async_mode
parser = ArgumentParser()
parser.add_argument("-d", "--device",
help="Specify the target device to infer on; CPU, GPU,"
"FPGA, MYRIAD or HDDL is acceptable. Application will"
"look for a suitable plugin for device specified"
" (CPU by default)",
type=str, required=False)
parser.add_argument("-m", "--model",
help="Path to an .xml file with a trained model's"
" weights.",
required=True, type=str)
parser.add_argument("-sm", "--safety_model",
help="Path to an .xml file with a trained model's"
" weights.",
required=False, type=str, default=None)
parser.add_argument("-e", "--cpu_extension",
help="MKLDNN (CPU)-targeted custom layers. Absolute "
"path to a shared library with the kernels impl",
type=str, default=None)
parser.add_argument("-f", "--flag", help="sync or async", default="async", type=str)
args = parser.parse_args()
global conf_modelLayers, conf_modelWeights, conf_safety_modelLayers, conf_safety_modelWeights, \
targetDevice, cpu_extension, videos, use_safety_model
if args.model:
conf_modelLayers = args.model
conf_modelWeights = os.path.splitext(conf_modelLayers)[0] + ".bin"
if args.safety_model:
conf_safety_modelLayers = args.safety_model
conf_safety_modelWeights = os.path.splitext(conf_safety_modelLayers)[0] + ".bin"
use_safety_model = True
if args.device:
targetDevice = args.device
if "MULTI:" not in targetDevice:
if targetDevice not in acceptedDevices:
print("Selected device, %s not supported." % (targetDevice))
if args.cpu_extension:
cpu_extension = args.cpu_extension
if args.flag == "async":
is_async_mode = True
print('Application running in Async mode')
is_async_mode = False
print('Application running in Sync mode')
assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(CONFIG_FILE)
config = json.loads(open(CONFIG_FILE).read())
for idx, item in enumerate(config['inputs']):
vid = Video(idx, item['video'])
name_of_videos.append([idx, item['video']])
videos.append([idx, vid])
def detect_safety_hat(img):
Detection of the hat of the person.
:param img: Current frame
:return: Boolean value of the detected hat
lowH = 15
lowS = 65
lowV = 75
highH = 30
highS = 255
highV = 255
crop = 0
height = 15
perc = 8
hsv = np.zeros(1)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
except cv2.error as e:
print("%d %d %d" % (img.shape))
print("%d %d %d" % (img.shape))
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_safety_jacket(img):
Detection of the safety jacket of the person.
:param img: Current frame
:return: Boolean value of the detected jacket
lowH = 0
lowS = 150
lowV = 42
highH = 11
highS = 255
highV = 255
crop = 15
height = 40
perc = 23
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_workers(workers, frame):
Detection of the person with the safety guards.
:param workers: Total number of the person in the current frame
:param frame: Current frame
:return: Total violation count of the person
violations = 0
global viol_wk
for worker in workers:
xmin, ymin, xmax, ymax = worker
crop = frame[ymin:ymax, xmin:xmax]
if 0 not in crop.shape:
if detect_safety_hat(crop):
if detect_safety_jacket(crop):
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 255, 0), 2)
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 0, 255), 2)
violations += 1
viol_wk += 1
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
violations += 1
viol_wk += 1
return violations
def main():
Load the network and parse the output.
:return: None
global is_async_mode
nextReq = 1
currReq = 0
nextReq_s = 1
currReq_s = 0
prevVideo = None
vid_finished = [False] * len(videos)
min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])
# Initialise the class
infer_network = Network()
infer_network_safety = Network()
# Load the network to IE plugin to get shape of input layer
plugin, (batch_size, channels, model_height, model_width) = \
infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
if use_safety_model:
batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]
while True:
for index, currVideo in videos:
# Read image from video/cam
vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
for i in range(0, int(round(vfps / min_FPS))):
ret, current_img = currVideo.video.read()
if not ret:
vid_finished[index] = True
if vid_finished[index]:
stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1),
cv2.putText(stream_end_frame, "Input file {} has ended".format
(10, int(currVideo.height / 2)),
cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(currVideo.name, stream_end_frame)
# Transform image to person detection model input
rsImg = cv2.resize(current_img, (model_width, model_height))
rsImg = rsImg.transpose((2, 0, 1))
rsImg = rsImg.reshape((batch_size, channels, model_height, model_width))
infer_start_time = datetime.datetime.now()
# Infer current image
if is_async_mode:
infer_network.exec_net(nextReq, rsImg)
infer_network.exec_net(currReq, rsImg)
prevVideo = currVideo
previous_img = current_img
# Wait for previous request to end
if infer_network.wait(currReq) == 0:
infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000
in_frame_workers = []
people = 0
violations = 0
hard_hat_detection = False
vest_detection = False
result = infer_network.get_output(currReq)
# Filter output
for obj in result[0][0]:
if obj[2] > conf_inferConfidenceThreshold:
xmin = int(obj[3] * prevVideo.width)
ymin = int(obj[4] * prevVideo.height)
xmax = int(obj[5] * prevVideo.width)
ymax = int(obj[6] * prevVideo.height)
xmin = int(xmin - padding) if (xmin - padding) > 0 else 0
ymin = int(ymin - padding) if (ymin - padding) > 0 else 0
xmax = int(xmax + padding) if (xmax + padding) < prevVideo.width else prevVideo.width
ymax = int(ymax + padding) if (ymax + padding) < prevVideo.height else prevVideo.height
cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
people += 1
in_frame_workers.append((xmin, ymin, xmax, ymax))
new_frame = previous_img[ymin:ymax, xmin:xmax]
if use_safety_model:
# Transform image to safety model input
in_frame_sm = cv2.resize(new_frame, (model_width_sm, model_height_sm))
in_frame_sm = in_frame_sm.transpose((2, 0, 1))
in_frame_sm = in_frame_sm.reshape(
(batch_size_sm, channels_sm, model_height_sm, model_width_sm))
infer_start_time_sm = datetime.datetime.now()
if is_async_mode:
infer_network_safety.exec_net(nextReq_s, in_frame_sm)
infer_network_safety.exec_net(currReq_s, in_frame_sm)
# Wait for the result
infer_end_time_sm = (datetime.datetime.now() - infer_start_time_sm) * 1000
result_sm = infer_network_safety.get_output(currReq_s)
# Filter output
hard_hat_detection = False
vest_detection = False
detection_list = []
for obj_sm in result_sm[0][0]:
if (obj_sm[2] > 0.4):
# Detect safety vest
if (int(obj_sm[1])) == 2:
xmin_sm = int(obj_sm[3] * (xmax - xmin))
ymin_sm = int(obj_sm[4] * (ymax - ymin))
xmax_sm = int(obj_sm[5] * (xmax - xmin))
ymax_sm = int(obj_sm[6] * (ymax - ymin))
if vest_detection == False:
[xmin_sm + xmin, ymin_sm + ymin, xmax_sm + xmin, ymax_sm + ymin])
vest_detection = True
# Detect hard-hat
if int(obj_sm[1]) == 4:
xmin_sm_v = int(obj_sm[3] * (xmax - xmin))
ymin_sm_v = int(obj_sm[4] * (ymax - ymin))
xmax_sm_v = int(obj_sm[5] * (xmax - xmin))
ymax_sm_v = int(obj_sm[6] * (ymax - ymin))
if hard_hat_detection == False:
detection_list.append([xmin_sm_v + xmin, ymin_sm_v + ymin, xmax_sm_v + xmin,
ymax_sm_v + ymin])
hard_hat_detection = True
if hard_hat_detection is False or vest_detection is False:
violations += 1
for _rect in detection_list:
cv2.rectangle(current_img, (_rect[0], _rect[1]), (_rect[2], _rect[3]), (0, 255, 0), 2)
if is_async_mode:
currReq_s, nextReq_s = nextReq_s, currReq_s
# Use OpenCV if worker-safety-model is not provided
violations = detect_workers(in_frame_workers, previous_img)
# Check if detected violations equals previous frames
if violations == prevVideo.currentViolationCount:
prevVideo.currentViolationCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:
# If another violation occurred, save image
if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
prevVideo.totalViolations += (
prevVideo.currentViolationCount - prevVideo.prevViolationCount)
prevVideo.prevViolationCount = prevVideo.currentViolationCount
prevVideo.currentViolationCountConfidence = 0
prevVideo.currentViolationCount = violations
# Check if detected people count equals previous frames
if people == prevVideo.currentPeopleCount:
prevVideo.currentPeopleCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
prevVideo.currentTotalPeopleCount += (
prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
prevVideo.currentPeopleCountConfidence = 0
prevVideo.currentPeopleCount = people
frame_end_time = datetime.datetime.now()
cv2.putText(previous_img, 'Total people count: ' + str(
prevVideo.totalPeopleCount), (10, prevVideo.height - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Current people count: ' + str(
(10, prevVideo.height - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Total violation count: ' + str(
prevVideo.totalViolations), (10, prevVideo.height - 70),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / (
frame_end_time - prevVideo.frame_start_time).total_seconds()),
(10, prevVideo.height - 100),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else \
"Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
(10, prevVideo.height - 130),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(prevVideo.name, previous_img)
prevVideo.frame_start_time = datetime.datetime.now()
# Swap
if is_async_mode:
currReq, nextReq = nextReq, currReq
previous_img = current_img
prevVideo = currVideo
if cv2.waitKey(1) == 27:
print("Attempting to stop input files")
if False not in vid_finished:
if __name__ == '__main__':
Here is the config file
"inputs": [
"video": "rtsp://xxx:xxx#"
This is because of the line if os.path.exists(path):. This if condition checks if path points towards an existing file. Your RTSP stream not being a file, it leads to your error.
For example, you can modify this condition to:
if os.path.exists(path) or path.startswith("rtsp"):
By the way, your hard-coded the rtsp stream address within the code, so it will not use your configured path. You may want to replace the hard-coded path with path.
I'm creating a heatmap for a CNN as per this tutorial.
In the last part:
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'./CAMS/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
cv2.resize(slice_cams[s], (256, 256)),
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
I have created a folder "CAMS" in my 'mrnet' folder. However when running this last code (in jupyter notebook) I get no errors but no png's are being created. Anyone has any idea what could be wrong or where I could look to see what's wrong as I get no errors?
# -*- coding: utf-8 -*-
Created on Sat Mar 13 21:54:40 2021
#author: GlaDOS
import os
import io
import requests
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import pdb
from matplotlib import pyplot as plt
import sys
import shutil
import torch
import model
from dataloader import MRDataset
from tqdm import tqdm_notebook
task = 'acl'
plane = 'sagittal'
prefix = 'sag'
model_name = [name for name in os.listdir('C:/Users/GlaDOS/mrnet/models/')
if (task in name) and
(plane in name) and
(prefix in name)][0]
is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")
mrnet = torch.load(f'C:/Users/GlaDOS/mrnet/models/{model_name}')
mrnet = mrnet.to(device)
_ = mrnet.eval()
dataset = MRDataset('C:/Users/GlaDOS/mrnet/data/',
loader = torch.utils.data.DataLoader(dataset,
def returnCAM(feature_conv, weight_softmax, class_idx):
size_upsample = (256, 256)
bz, nc, h, w = feature_conv.shape
slice_cams = []
for s in range(bz):
for idx in class_idx:
cam = weight_softmax[idx].dot(feature_conv[s].reshape((nc, h*w)))
cam = cam.reshape(h, w)
cam = cam - np.min(cam)
cam_img = cam / np.max(cam)
cam_img = np.uint8(255 * cam_img)
slice_cams.append(cv2.resize(cam_img, size_upsample))
return slice_cams
patients = []
for i, (image, label, _) in tqdm_notebook(enumerate(loader), total=len(loader)):
patient_data = {}
patient_data['mri'] = image
patient_data['label'] = label[0][0][1].item()
patient_data['id'] = '0' * (4 - len(str(i))) + str(i)
acl = list(filter(lambda d: d['label'] == 1, patients))
def create_patiens_cam(case, plane):
patient_id = case['id']
mri = case['mri']
folder_path = f'C:/Users/GlaDOS/mrnet/cams/{plane}/{patient_id}/'
if os.path.isdir(folder_path):
os.makedirs(folder_path + 'slices/')
os.makedirs(folder_path + 'cams/')
params = list(mrnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())
num_slices = mri.shape[1]
global feature_blobs
feature_blobs = []
mri = mri.to(device)
logit = mrnet(mri)
size_upsample = (256, 256)
feature_conv = feature_blobs[0]
h_x = F.softmax(logit, dim=1).data.squeeze(0)
probs, idx = h_x.sort(0, True)
probs = probs.cpu().numpy()
idx = idx.cpu().numpy()
slice_cams = returnCAM(feature_blobs[-1], weight_softmax, idx[:1])
for s in tqdm_notebook(range(num_slices), leave=False):
slice_pil = (transforms
.ToPILImage()(mri.cpu()[0][s] / 255))
slice_pil.save(folder_path + f'slices/{s}.png',
dpi=(300, 300))
img = mri[0][s].cpu().numpy()
img = img.transpose(1, 2, 0)
heatmap = (cv2
cv2.resize(slice_cams[s], (256, 256)),
result = heatmap * 0.3 + img * 0.5
pil_img_cam = Image.fromarray(np.uint8(result))
pil_img_cam.save(folder_path + f'cams/{s}.png', dpi=(300, 300))
Use seaborn:
import seaborn as sns
I wanted to find out how the video frame length was calculated in the below code.
[UPD] Before I was thinking it was done by Yolo, but later I realized it was OpenCV that dealt with number of frames in a video file.
Class definition of YOLO_v3 style detection model on image and video
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
self.yolo_model = load_model(model_path, compile=False)
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
[left + i, top + i, right - i, bottom - i],
[tuple(text_origin), tuple(text_origin + label_size)],
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
def detect_video(yolo, video_path, output_path=""):
import cv2
video_path = './input.mp4'
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time == 10 : mouseBrush(image)
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
if cv2.waitKey(1) & 0xFF == ord('q'):
Actually, this code is just one part of the all Yolo3 model, but I think the part that deals with the number of video frames is included here.
If you mean the current FPS. This is the part showing the current FPS in string.
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if curr_fps == 10: # Stops at 10th frame.
time.sleep(60) # Delay for 1 minute (60 seconds).
if isOutput:
if cv2.waitKey(1) & 0xFF == ord('q'):
I needed the frame number to control every 10th frame in the video file, and thanks to above comments, I figured out that the line I was looking for is:
curr_fps = curr_fps + 1
UPD: The following line calculated the number of frames in a video file.
NumberOfFrame = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))