I wanted to find out how the video frame length was calculated in the below code.
[UPD] Before I was thinking it was done by Yolo, but later I realized it was OpenCV that dealt with number of frames in a video file.
"""
Class definition of YOLO_v3 style detection model on image and video
"""
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
}
#classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
self.sess.close()
def detect_video(yolo, video_path, output_path=""):
import cv2
video_path = './input.mp4'
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time == 10 : mouseBrush(image)
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolo.close_session()
Actually, this code is just one part of the all Yolo3 model, but I think the part that deals with the number of video frames is included here.
If you mean the current FPS. This is the part showing the current FPS in string.
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if curr_fps == 10: # Stops at 10th frame.
time.sleep(60) # Delay for 1 minute (60 seconds).
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
I needed the frame number to control every 10th frame in the video file, and thanks to above comments, I figured out that the line I was looking for is:
curr_fps = curr_fps + 1
UPD: The following line calculated the number of frames in a video file.
NumberOfFrame = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
Related
I am trying to call my Face Recognition model implemented in keras, using flask API. I am unable to call the model using different cam urls as a parameter.
I am getting the following error:
TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder_50:0", shape=(3, 3, 3, 32), dtype=float32) is not an element of this graph.
127.0.0.1 - - [23/Nov/2022 13:39:49] "GET /api/recognise?url=rtsp://admin:inndata123#10.10.5.202:554/cam/realmonitor?channel=1&subtype=0 HTTP/1.1" 500 -
I found that creating a new session for each thread, but I don't have any idea where to place those lines in my code.
# running db and email functions in background and parallalized action and bbox dist loops
import json
import os
import pickle
import cv2
import imutils
import dlib
import torch
import time
import numpy as np
import datetime
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image, ImageFont, ImageDraw
from script.fx import prewhiten, l2_normalize
from keras.models import load_model
from scipy.spatial import distance
from mtcnn.mtcnn import MTCNN
from script.generate_data import generate_embeddings
import mysql.connector
from mysql.connector import (connection)
import smtplib
import mimetypes
from email.message import EmailMessage
message = EmailMessage()
import tensorflow as tf
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
inter_op_parallelism_threads=2)
from flask import Flask, jsonify, request,render_template,Response
app = Flask(__name__)
global graph
graph = tf.get_default_graph()
sess = tf.Session(graph=graph, config=session_conf)
model_path = './data/model/facenet_keras.h5'
font_path = './data/font/Calibri Regular.ttf'
embedding_path = './data/arrays/embeddings.npz'
vars_path = './data/arrays/vars.npz'
curr_time = datetime.datetime.now()
time_date = curr_time.strftime('%Y-%m-%d %H:%M:%S')
only_date= curr_time.strftime('%Y-%m-%d')
login_time = curr_time.replace(hour=8, minute=0, second=0, microsecond=0)
logout_time = curr_time.replace(hour=17, minute=15, second=0, microsecond=0)
if os.path.exists(embedding_path) == True:
print('Loadings embeddings...')
loaded_embeddings = np.load(embedding_path)
embeddings, names = loaded_embeddings['a'], loaded_embeddings['b']
loaded_vars = np.load(vars_path)
slope, intercept = loaded_vars['a'], loaded_vars['b']
else:
print('Creatings embeddings...')
generate_embeddings()
loaded_embeddings = np.load(embedding_path)
embeddings, names = loaded_embeddings['a'], loaded_embeddings['b']
loaded_vars = np.load(vars_path)
slope, intercept = loaded_vars['a'], loaded_vars['b']
location='IE'
cam_id='Entrance-Cam'
frame_count = 0
frame_number = 0
bbox_centers = []
log_in = []
log_out = []
date_list = []
mins_lst = []
#app.route('/api/recognise')
def recognise():
url = request.args.get('url')
if url!=str(0):
subtype=request.args.get('subtype')
url=url+'&'+'subtype='+subtype
print(url)
else:url=int(url)
video_sources = cv2.VideoCapture(url)
detector = MTCNN()
model = load_model(model_path, compile=False)
graph = tf.get_default_graph()
def inner():
frame_count = 0
frame_number = 0
while 1:
start= time.time()
var, frame = video_sources.read()
if frame is not None:
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
# frame = cv2.resize(frame, (1500, 1000))
if frame_count % 10 == 0 and rgb_small_frame is not None:
faces = detector.detect_faces(rgb_small_frame) # result
#print(faces)
print('faces :',len(faces))
for result in faces:
x_face, y_face, w_face, h_face = result['box']
x_face = x_face * 4
y_face = y_face * 4
w_face = w_face * 4
h_face = h_face * 4
x_face2=w_face+x_face
y_face2=h_face+y_face
#face bbox tuples
face_tuple1=(x_face,y_face)
face_tuple2=(x_face2,y_face2)
#zone bbox tuples
zone_tuple1 = (950, 700)
zone_tuple2 = (2000, 1050)
# Margins for Face box
dw = 0.1 * w_face
dh = 0.2 * h_face
#center = (x_face + w_face // 2, y_face + h_face // 2)
#cv2.rectangle(frame, zone_tuple1, zone_tuple2, (255, 0, 0), 2)
#if (all(x > y for x, y in zip(face_tuple1, zone_tuple1)))==True and (all(x < y for x, y in zip(face_tuple2, zone_tuple2)))==True:
# radius=2
with graph.as_default():
dist = []
for i in range(len(embeddings)):
dist.append(distance.euclidean(l2_normalize(model.predict(prewhiten(
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face], (160, 160)).reshape(
-1, 160,
160,
3)))),
embeddings[i].reshape(1, 128)))
dist = np.array(dist)
if os.path.exists(only_date + '.txt') == False:
f = open(only_date + '.txt', "a+")
log_in.clear()
log_out.clear()
else:
if dist.min() > 1.20:
log = 'Unauthorized Entry'
emp_id = 'None'
f1 = open("unauthorised.txt", "a")
f1.writelines(f"\n{cam_id},{time_date},{log}")
elif dist.min() <= 1:
emp_id = names[dist.argmin()]
if int(emp_id) not in log_in and curr_time >= login_time:
log = 'punch-in'
f2 = open(only_date + '.txt', "a")
f2.writelines(f"\n{cam_id},{emp_id},{time_date},{log}")
f2.close()
log_in.append(int(emp_id))
print(log_in)
if int(emp_id) in log_in and curr_time >= logout_time and int(emp_id) not in log_out:
# and center[0] > 750 and center[0] > 960:
log = 'punch-out'
f2 = open(only_date + '.txt', "a")
f2.writelines(f"\n{cam_id},{emp_id},{time_date},{log}")
f2.close()
log_out.append(int(emp_id))
else:
emp_id = 'None'
log = 'unidentified'
if emp_id != 'unauthorized' and emp_id != 'unidentified':
font_size = int(
slope[dist.argmin()] * ((w_face + 2 * dw) // 3) * 2 + intercept[dist.argmin()])
color = (0, 255, 0)
elif emp_id == 'unauthorized':
font_size = int(
slope[dist.argmin()] * ((w_face + 2 * dw) // 3) * 2 + intercept[dist.argmin()])
color = (0, 0, 255)
else:
font_size = int(0.1974311 * ((w_face + 2 * dw) // 3) * 2 + 0.03397702412218706)
color = (0, 255, 0)
font = ImageFont.truetype(font_path, font_size)
size = font.getbbox(emp_id)
cv2.rectangle(frame,
pt1=(x_face - int(np.floor(dw)), (y_face - int(np.floor(dh)))),
pt2=(
(x_face + w_face + int(np.ceil(dw))), (y_face + h_face + int(np.ceil(dh)))),
color=(0, 255, 0),
thickness=2) # Face Rectangle
cv2.rectangle(frame,
pt1=(x_face - int(np.floor(dw)), y_face - int(np.floor(dh)) - size[1]),
pt2=(x_face + size[0], y_face - int(np.floor(dh))),
color=(0, 255, 0),
thickness=-1)
img = Image.fromarray(frame)
draw = ImageDraw.Draw(img)
draw.text((x_face - int(np.floor(dw)), y_face - int(np.floor(dh)) - size[1]), emp_id,
font=font,
fill=color)
frame = np.array(img)
if emp_id == 'unauthorized':
frame_name = f'{emp_id}_{frame_number}.jpg'
cv2.imwrite(f'data/unauthorized_faces/{frame_name}',
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face],
(250, 250)))
elif emp_id != 'unauthorised' and emp_id != 'unidentified':
frame_name = f'{emp_id}_{frame_number}.jpg'
cv2.imwrite(f'data/detected_faces/{frame_name}',
cv2.resize(frame[y_face:y_face + h_face, x_face:x_face + w_face],
(250, 250)))
# add_attachment(frame_name)
frame_number += 1
end = time.time()
print(end-start)
print(emp_id)
if log != 'unidentified':
data = {'emp_id': emp_id, 'date': time_date, 'log': log}
yield json.dumps(data) + "\n"
# cv2.imshow('Frame', cv2.resize(frame, (950, 950)))
if cv2.waitKey(15) & 255 == ord('q'):
break
else:
continue
return Response(inner(), mimetype='application/json')
if __name__=='__main__':
app.run(host="0.0.0.0",threaded=True)
This is my face recognition model integrated in flask.
I'm setting up a PPE Detection module using OpenVINO in my Ubuntu 18.04. Although the video input worked well with my webcam dev/video/0 but I wish it can be change to RTSP input. Whenever I put my RTSP Url inside the config.json it doesnt work and show me Either wrong input path or empty line is found. Please check the conf.json file.
Here is the main.py
#!/usr/bin/env python3
from __future__ import print_function
import sys
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import datetime
import json
from inference import Network
# Global vars
cpu_extension = ''
conf_modelLayers = ''
conf_modelWeights = ''
conf_safety_modelLayers = ''
conf_safety_modelWeights = ''
targetDevice = "CPU"
conf_batchSize = 1
conf_modelPersonLabel = 1
conf_inferConfidenceThreshold = 0.7
conf_inFrameViolationsThreshold = 19
conf_inFramePeopleThreshold = 5
use_safety_model = False
padding = 30
viol_wk = 0
acceptedDevices = ['CPU', 'GPU', 'MYRIAD', 'HETERO:FPGA,CPU', 'HDDL']
videos = []
name_of_videos = []
CONFIG_FILE = '../resources/config.json'
is_async_mode = True
class Video:
def __init__(self, idx, path):
if path.isnumeric():
self.video = cv2.VideoCapture(int(path))
self.name = "Cam " + str(idx)
else:
if os.path.exists(path):
self.video = cv2.VideoCapture("rtsp://edwin:Passw0rd#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1")
self.name = "Video " + str(idx)
else:
print("Either wrong input path or empty line is found. Please check the conf.json file")
exit(21)
if not self.video.isOpened():
print("Couldn't open video: " + path)
sys.exit(20)
self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
self.currentViolationCount = 0
self.currentViolationCountConfidence = 0
self.prevViolationCount = 0
self.totalViolations = 0
self.totalPeopleCount = 0
self.currentPeopleCount = 0
self.currentPeopleCountConfidence = 0
self.prevPeopleCount = 0
self.currentTotalPeopleCount = 0
cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
self.frame_start_time = datetime.datetime.now()
def get_args():
"""
Parses the argument.
:return: None
"""
global is_async_mode
parser = ArgumentParser()
parser.add_argument("-d", "--device",
help="Specify the target device to infer on; CPU, GPU,"
"FPGA, MYRIAD or HDDL is acceptable. Application will"
"look for a suitable plugin for device specified"
" (CPU by default)",
type=str, required=False)
parser.add_argument("-m", "--model",
help="Path to an .xml file with a trained model's"
" weights.",
required=True, type=str)
parser.add_argument("-sm", "--safety_model",
help="Path to an .xml file with a trained model's"
" weights.",
required=False, type=str, default=None)
parser.add_argument("-e", "--cpu_extension",
help="MKLDNN (CPU)-targeted custom layers. Absolute "
"path to a shared library with the kernels impl",
type=str, default=None)
parser.add_argument("-f", "--flag", help="sync or async", default="async", type=str)
args = parser.parse_args()
global conf_modelLayers, conf_modelWeights, conf_safety_modelLayers, conf_safety_modelWeights, \
targetDevice, cpu_extension, videos, use_safety_model
if args.model:
conf_modelLayers = args.model
conf_modelWeights = os.path.splitext(conf_modelLayers)[0] + ".bin"
if args.safety_model:
conf_safety_modelLayers = args.safety_model
conf_safety_modelWeights = os.path.splitext(conf_safety_modelLayers)[0] + ".bin"
use_safety_model = True
if args.device:
targetDevice = args.device
if "MULTI:" not in targetDevice:
if targetDevice not in acceptedDevices:
print("Selected device, %s not supported." % (targetDevice))
sys.exit(12)
if args.cpu_extension:
cpu_extension = args.cpu_extension
if args.flag == "async":
is_async_mode = True
print('Application running in Async mode')
else:
is_async_mode = False
print('Application running in Sync mode')
assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(CONFIG_FILE)
config = json.loads(open(CONFIG_FILE).read())
for idx, item in enumerate(config['inputs']):
vid = Video(idx, item['video'])
name_of_videos.append([idx, item['video']])
videos.append([idx, vid])
def detect_safety_hat(img):
"""
Detection of the hat of the person.
:param img: Current frame
:return: Boolean value of the detected hat
"""
lowH = 15
lowS = 65
lowV = 75
highH = 30
highS = 255
highV = 255
crop = 0
height = 15
perc = 8
hsv = np.zeros(1)
try:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
except cv2.error as e:
print("%d %d %d" % (img.shape))
print("%d %d %d" % (img.shape))
print(e)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_safety_jacket(img):
"""
Detection of the safety jacket of the person.
:param img: Current frame
:return: Boolean value of the detected jacket
"""
lowH = 0
lowS = 150
lowV = 42
highH = 11
highS = 255
highV = 255
crop = 15
height = 40
perc = 23
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
threshold_img = cv2.inRange(hsv, (lowH, lowS, lowV), (highH, highS, highV))
x = 0
y = int(threshold_img.shape[0] * crop / 100)
w = int(threshold_img.shape[1])
h = int(threshold_img.shape[0] * height / 100)
img_cropped = threshold_img[y: y + h, x: x + w]
if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
return False
return True
def detect_workers(workers, frame):
"""
Detection of the person with the safety guards.
:param workers: Total number of the person in the current frame
:param frame: Current frame
:return: Total violation count of the person
"""
violations = 0
global viol_wk
for worker in workers:
xmin, ymin, xmax, ymax = worker
crop = frame[ymin:ymax, xmin:xmax]
if 0 not in crop.shape:
if detect_safety_hat(crop):
if detect_safety_jacket(crop):
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 255, 0), 2)
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
(0, 0, 255), 2)
violations += 1
viol_wk += 1
else:
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
violations += 1
viol_wk += 1
return violations
def main():
"""
Load the network and parse the output.
:return: None
"""
get_args()
global is_async_mode
nextReq = 1
currReq = 0
nextReq_s = 1
currReq_s = 0
prevVideo = None
vid_finished = [False] * len(videos)
min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])
# Initialise the class
infer_network = Network()
infer_network_safety = Network()
# Load the network to IE plugin to get shape of input layer
plugin, (batch_size, channels, model_height, model_width) = \
infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
if use_safety_model:
batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]
while True:
for index, currVideo in videos:
# Read image from video/cam
vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
for i in range(0, int(round(vfps / min_FPS))):
ret, current_img = currVideo.video.read()
if not ret:
vid_finished[index] = True
break
if vid_finished[index]:
stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1),
dtype='uint8')
cv2.putText(stream_end_frame, "Input file {} has ended".format
(name_of_videos[index][1].split('/')[-1]),
(10, int(currVideo.height / 2)),
cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(currVideo.name, stream_end_frame)
continue
# Transform image to person detection model input
rsImg = cv2.resize(current_img, (model_width, model_height))
rsImg = rsImg.transpose((2, 0, 1))
rsImg = rsImg.reshape((batch_size, channels, model_height, model_width))
infer_start_time = datetime.datetime.now()
# Infer current image
if is_async_mode:
infer_network.exec_net(nextReq, rsImg)
else:
infer_network.exec_net(currReq, rsImg)
prevVideo = currVideo
previous_img = current_img
# Wait for previous request to end
if infer_network.wait(currReq) == 0:
infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000
in_frame_workers = []
people = 0
violations = 0
hard_hat_detection = False
vest_detection = False
result = infer_network.get_output(currReq)
# Filter output
for obj in result[0][0]:
if obj[2] > conf_inferConfidenceThreshold:
xmin = int(obj[3] * prevVideo.width)
ymin = int(obj[4] * prevVideo.height)
xmax = int(obj[5] * prevVideo.width)
ymax = int(obj[6] * prevVideo.height)
xmin = int(xmin - padding) if (xmin - padding) > 0 else 0
ymin = int(ymin - padding) if (ymin - padding) > 0 else 0
xmax = int(xmax + padding) if (xmax + padding) < prevVideo.width else prevVideo.width
ymax = int(ymax + padding) if (ymax + padding) < prevVideo.height else prevVideo.height
cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
people += 1
in_frame_workers.append((xmin, ymin, xmax, ymax))
new_frame = previous_img[ymin:ymax, xmin:xmax]
if use_safety_model:
# Transform image to safety model input
in_frame_sm = cv2.resize(new_frame, (model_width_sm, model_height_sm))
in_frame_sm = in_frame_sm.transpose((2, 0, 1))
in_frame_sm = in_frame_sm.reshape(
(batch_size_sm, channels_sm, model_height_sm, model_width_sm))
infer_start_time_sm = datetime.datetime.now()
if is_async_mode:
infer_network_safety.exec_net(nextReq_s, in_frame_sm)
else:
infer_network_safety.exec_net(currReq_s, in_frame_sm)
# Wait for the result
infer_network_safety.wait(currReq_s)
infer_end_time_sm = (datetime.datetime.now() - infer_start_time_sm) * 1000
result_sm = infer_network_safety.get_output(currReq_s)
# Filter output
hard_hat_detection = False
vest_detection = False
detection_list = []
for obj_sm in result_sm[0][0]:
if (obj_sm[2] > 0.4):
# Detect safety vest
if (int(obj_sm[1])) == 2:
xmin_sm = int(obj_sm[3] * (xmax - xmin))
ymin_sm = int(obj_sm[4] * (ymax - ymin))
xmax_sm = int(obj_sm[5] * (xmax - xmin))
ymax_sm = int(obj_sm[6] * (ymax - ymin))
if vest_detection == False:
detection_list.append(
[xmin_sm + xmin, ymin_sm + ymin, xmax_sm + xmin, ymax_sm + ymin])
vest_detection = True
# Detect hard-hat
if int(obj_sm[1]) == 4:
xmin_sm_v = int(obj_sm[3] * (xmax - xmin))
ymin_sm_v = int(obj_sm[4] * (ymax - ymin))
xmax_sm_v = int(obj_sm[5] * (xmax - xmin))
ymax_sm_v = int(obj_sm[6] * (ymax - ymin))
if hard_hat_detection == False:
detection_list.append([xmin_sm_v + xmin, ymin_sm_v + ymin, xmax_sm_v + xmin,
ymax_sm_v + ymin])
hard_hat_detection = True
if hard_hat_detection is False or vest_detection is False:
violations += 1
for _rect in detection_list:
cv2.rectangle(current_img, (_rect[0], _rect[1]), (_rect[2], _rect[3]), (0, 255, 0), 2)
if is_async_mode:
currReq_s, nextReq_s = nextReq_s, currReq_s
# Use OpenCV if worker-safety-model is not provided
else:
violations = detect_workers(in_frame_workers, previous_img)
# Check if detected violations equals previous frames
if violations == prevVideo.currentViolationCount:
prevVideo.currentViolationCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:
# If another violation occurred, save image
if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
prevVideo.totalViolations += (
prevVideo.currentViolationCount - prevVideo.prevViolationCount)
prevVideo.prevViolationCount = prevVideo.currentViolationCount
else:
prevVideo.currentViolationCountConfidence = 0
prevVideo.currentViolationCount = violations
# Check if detected people count equals previous frames
if people == prevVideo.currentPeopleCount:
prevVideo.currentPeopleCountConfidence += 1
# If frame threshold is reached, change validated count
if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
prevVideo.currentTotalPeopleCount += (
prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
else:
prevVideo.currentPeopleCountConfidence = 0
prevVideo.currentPeopleCount = people
frame_end_time = datetime.datetime.now()
cv2.putText(previous_img, 'Total people count: ' + str(
prevVideo.totalPeopleCount), (10, prevVideo.height - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Current people count: ' + str(
prevVideo.currentTotalPeopleCount),
(10, prevVideo.height - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'Total violation count: ' + str(
prevVideo.totalViolations), (10, prevVideo.height - 70),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / (
frame_end_time - prevVideo.frame_start_time).total_seconds()),
(10, prevVideo.height - 100),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else \
"Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
(10, prevVideo.height - 130),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow(prevVideo.name, previous_img)
prevVideo.frame_start_time = datetime.datetime.now()
# Swap
if is_async_mode:
currReq, nextReq = nextReq, currReq
previous_img = current_img
prevVideo = currVideo
if cv2.waitKey(1) == 27:
print("Attempting to stop input files")
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
return
if False not in vid_finished:
infer_network.clean()
infer_network_safety.clean()
cv2.destroyAllWindows()
break
if __name__ == '__main__':
main()
Here is the config file
{
"inputs": [
{
"video": "rtsp://xxx:xxx#192.168.0.144:554/cam/realmonitor?channel=1&subtype=1"
}
]
}
This is because of the line if os.path.exists(path):. This if condition checks if path points towards an existing file. Your RTSP stream not being a file, it leads to your error.
For example, you can modify this condition to:
if os.path.exists(path) or path.startswith("rtsp"):
By the way, your hard-coded the rtsp stream address within the code, so it will not use your configured path. You may want to replace the hard-coded path with path.
So I have am working with YOLOv4 to process video frames for object detection of one class : Human and every time a Human is detected in frame it prints a line in the terminal " Number of human detected :" and gives the number of human detected in a particular frame. Now I want the code to run as it is but instead of printing the above output for every frame, it should print the output of the videoframe it processes at the first 1 min mark and there after at every 3 min mark till the video is fully processed. So for a 5 min video, i would want the statement to be printed at the following videotimestamps: 1:00, 4:00. For a 8 min video it would be: 1:00, 4:00, 7:00.... and so on. I tried using schedule module but it seems to just schedule the entire code to run after 1 min.
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import datetime
import schedule
import time
from time import sleep
file = "test2"
input = "C:/Users/asmita.nandi/Downloads/" + file + ".mp4"
output = "C:/Users/asmita.nandi/Downloads/" + file + ".avi"
net = cv2.dnn.readNet( "C:/Users/asmita.nandi/Downloads/custom-yolov4-tiny_human-608
(1).cfg","C:/Users/asmita.nandi/Downloads/custom-yolov4-tiny-detector_human.weights")
labelsPath = "C:/Users/asmita.nandi/Downloads/human_label.txt"
def event(input,output,net,labelsPath):
LABELS = open(labelsPath).read().strip().split("\n")
np.random.seed(1)
cmap = plt.get_cmap('tab20b')
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 6)]
CONF_THRESH, NMS_THRESH = 0.25, 0.25
vs = cv2.VideoCapture(input)
fp = vs.get(cv2.CAP_PROP_FPS)
writer = None
W = None
H = None
totalFrames = 0
TotalHuman = 0
while True:
frame = vs.read()
frame = frame[1] if input else frame
if input is not None and frame is None:
break
(H, W) = frame.shape[:2]
print(H,W)
if W is None or H is None:
(H, W) = frame.shape[:2]
if output is not None and writer is None:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(output, fourcc,fp,(W,H), True)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (608,608), swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
print(layerOutputs)
end = time.time()
boxes = []
confidences = []
classIDs = []
(H, W) = frame.shape[:2]
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
scores = detection[5:]
#print(detection)
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > CONF_THRESH:
#print("Box")
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONF_THRESH,NMS_THRESH)
ObjectCount = {}
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
color = colors[classIDs[i]]
color = [i * 255 for i in color]
class_name = LABELS[classIDs[i]]
cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
# if class_name == "Human":
cv2.putText(frame, class_name,(x, y-10),0, 0.5,color,2)
obj, conf = LABELS[classIDs[i]], confidences[i]
if obj not in ObjectCount.keys():
ObjectCount[obj] = 1
else:
ObjectCount[obj] += 1
allvalues=[]
allvalues.append(ObjectCount[obj])
print("Number of Humans detected ", max(allvalues))
if writer is not None:
writer.write(frame)
# show the output frame
#cv2_imshow(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# increment the total number of frames processed thus far and
# then update the FPS counter
totalFrames += 1
fps.update()
# stop the timer and display FPS information
#for (objectID, centroid) in objects.items():
#print(objectID, centroids)
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print(totalFrames)
#print(info)
# check to see if we need to release the video writer pointer
if writer is not None:
writer.release()
# if we are not using a video file, stop the camera video stream
if not input:
vs.stop()
# otherwise, release the video file pointer
else:
vs.release()
# close any open windows
cv2.destroyAllWindows()
schedule.every(1).minutes.do(event(input,output,net,labelsPath))
while 1:
schedule.run_pending()
time.sleep(1)
I understand that it would be much simpler to just train YOLOv4 accordingly but:
I have limited computational resources and was hoping I could combine pre-trained models I found online, saving my time, or train a model and combine it with other models available online.
If I have one '.weights' file of a custom object detector that detects traffic signs and another '.weights' file of detector that detect pedestrians. Is there a way to combine these models, so that when run on a video/image (or in real-time capture), it detects pedestrians and traffic signs simultaneously.
By combining I mean, either to edit the 'weights' file somehow to achieve this, or editing the python code (while running the detector) that gets this done. (or any other way)
If not possible - is there any way to make them run in a sequence, efficiently?
Yes, It is possible to do that. Create two different python files and load the model in them. Then create a new file and initialise both of them in the new file. Then take the continous video / image feed as an input, the file will give output of detected traffic signs. Use this output as an input to your pedestrian problem.
import numpy as np
import pandas as pd
import cv2
import time
margin = 20
import detection_cls
run = detection_cls.Ppe_Detection_1()
class Ppe_Detection():
def __init__(self):
self.weightfile = 'yolov4_pretrained.weights'
self.cfgfile = 'cfg/yolov4_pretrained.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("coco.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# print(detection)
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,0,145)
# cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
# print(info)
return info
def cropping_detection(self):
count = 0
cropped_frame = []
value_img = []
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
# counts +=1
detection = ppe.detection(f)
try:
x,y,w,h,cls,conf = detection[0]
except Exception as e:
pass
# for i in range(len(detection)):
# print(detection[i])
try:
if cls=='person':
print("Person detected")
cropped_img = f[y-margin:y+h+margin,x-margin:x+w+margin]
# print(cropped_img)
# value = ppe.detection(cropped_img)
final = run.detection(cropped_img)
print('final',final)
# print(value)
# print(cropped_img)
# value_img.append(value)
# cropped_frame.append(cropped_img)
# cv2.imwrite("data/frame%d.jpg" % count, cropped_img)
cv2.imshow("Hi",cropped_img)
if cv2.waitKey(1) & 0xff==ord('q'):
break
# count += 1
else:
pass
except Exception as e:
print("In except part")
# cap.release()
# cv2.destroyAllWindows()
# print(cropped_img)
return value,cropped_img
ppe = Ppe_Detection()
ppe.cropping_detection()
# ppe.get_classes()
# ppe.run_video()
# take_photo()
# inference_image()
The function that I've imported here can be found below
import numpy as np
import pandas as pd
import cv2
import time
class Ppe_Detection():
def __init__(self):
self.weightfile = 'backup/yolov3_best.weights'
self.cfgfile = 'yolov3.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("obj.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def print_all(self):
print(self.classes)
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*width)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,255,145)
cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
print(info)
return info
ppe = Ppe_Detection()
ppe.get_classes()
ppe.print_all()
#############
def inference_image():
img = cv2.imread("sumit_off.jpg")
ppe.detection(img)
cv2.imshow("Image",img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def run_video():
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
try:
info = ppe.detection(f)
except Exception as e:
print("______",e)
cv2.imshow("image",f)
if cv2.waitKey(1) & 0xFF == ord("q") :
break
cap.release()
cv2.destroyAllWindows()
run_video()
I am using Yolov5. I want change my webcam -> lancamera
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'images'
self.img_size = img_size
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = sources
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened():
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
this code return 'self.sources, img, img0, None'
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
print((dataset))
I use 'dataset'
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
How to use for path, img, im0s, vid_cap in dataset: ??
my lancam code
def livecame():
vimba = Vimba()
vimba.startup()
system = vimba.system()
system.run_feature_command("GeVDiscoveryAllOnce")
time.sleep(0.1)
camera_ids = vimba.camera_ids()
# for cam_id in camera_ids:
# print("Camera found: ", cam_id)
print(camera_ids[0])
c0 = vimba.camera(camera_ids[0])
c0.open()
pixel_format = c0.feature("PixelFormat")
pixel_format.value = "BayerBG8"
try:
c0.StreamBytesPerSecond = 100000000
except:
pass
frame = c0.new_frame()
frame.announce()
c0.start_capture()
try:
frame.queue_for_capture()
success = True
except:
success = False
c0.run_feature_command("AcquisitionStart")
c0.run_feature_command("AcquisitionStop")
frame.wait_for_capture(1000)
frame_data = frame.buffer_data()
k = cv2.waitKey(1)
if k == 0x1b:
cv2.destroyAllWindows()
if success:
img = np.ndarray(buffer=frame_data,
dtype=np.uint8,
shape=(frame.data.height, frame.data.width, 1))
img = cv2.cvtColor(img, cv2.COLOR_BAYER_BG2RGB)
img0 = img.copy()
img = img.tolist()
img = [letterbox(x, new_shape=(800,400), auto= True)[0] for x in img0]
#img = np.ascontiguousarray(img)
img = np.stack(img, 0)
#img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return ['0'], img, img0
but I use dataset = new_file.livecame()
I can see error ValueError: not enough values to unpack (expected 3, got 1)
in for path, img, im0s, vid_cap in dataset:
how to use many variable? in for loop?
In Python OpenCV, one way is simply to use zip.
for component in zip(contours, hierarchy):
cntr = component[0]
hier = component[1]