# Import packages
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
# Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def __init__(self,resolution=(640,480),framerate=30):
# Initialize the PiCamera and the camera image stream
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
# Read first frame from the stream
(self.grabbed, self.frame) = self.stream.read()
# Variable to control when the camera is stopped
self.stopped = False
def start(self):
# Start the thread that reads frames from the video stream
Thread(target=self.update,args=()).start()
return self
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
# If the camera is stopped, stop the thread
if self.stopped:
# Close camera resources
self.stream.release()
return
# Otherwise, grab the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
def read(self):
# Return the most recent frame
return self.frame
def stop(self):
# Indicate that the camera and thread should be stopped
self.stopped = True
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
default='/home/pi/TensorFlow')
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='edgetpu.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.4)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='640x480')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
default=True,
#required=True,
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Check output layer name to determine if this model was created with TF2 or TF1,
# because outputs are ordered differently for TF2 and TF1 models
outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname): # This is a TF2 model
boxes_idx, classes_idx, scores_idx = 1, 3, 0
else: # This is a TF1 model
boxes_idx, classes_idx, scores_idx = 0, 1, 2
# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
#for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
while True:
# Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
# Grab frame from video stream
frame1 = videostream.read()
frame1 = cv2.flip(frame1,0)
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0] # Confidence of detected objects
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
# Draw circle in center
xcenter = xmin + (int(round((xmax - xmin) / 2)))
ycenter = ymin + (int(round((ymax - ymin) / 2)))
cv2.circle(frame, (xcenter, ycenter), 5, (0,0,255), thickness=-1)
# Print info
print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')')
# Draw framerate in corner of frame
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
cv2.destroyAllWindows()
videostream.stop()
I would like to try something in this code but i can't limit the detection to 1 at a time
The object detection and the model itself are good but i want to limit the result to 1 instead of multiple
this is the whole code i get from the internet
i also tried to add (parse.addArgument --maxResult) but i don't know how to run it in the tflite_runtime
i am new to the codes in python and in using RPi
Related
The code is for Real time number Plate Recognition using YOLOv5 and easyocr. I am trying to build the flask server and to embed the live streaming webcam to the HTML Page.
The model is trained with YOLOv5 which is used for number plate localization.
The flask server is not showing the OpenCV webcam frame to HTML page for Real Time Number Plate detection.
import torch
import cv2
import time
import numpy as np
import easyocr
from flask import Flask, render_template, Response
app = Flask(name, template_folder='yolov5/templates')
def generate_frames():
# DEFINING GLOBAL VARIABLE
EASY_OCR = easyocr.Reader(['en']) # initiating easyocr
OCR_TH = 0.2
print(f"[INFO] Loading model... ")
# loading the custom trained model
model = torch.hub.load('.', 'custom', source='local', path='runs/train/Model5/weights/best.pt',
force_reload=True) ### The repo is stored locally
classes = model.names # class names in string format
# -------------------------------------- function to run detection ---------------------------------------------------------
def detectx(frame, model):
frame = [frame]
print(f"[INFO] Detecting. . . ")
results = model(frame)
# results.show()
# print( results.xyxyn[0])
# print(results.xyxyn[0][:, -1])
# print(results.xyxyn[0][:, :-1])
labels, cordinates = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
return labels, cordinates
# ------------------------------------ to plot the BBox and results --------------------------------------------------------
def plot_boxes(results, frame, classes):
"""
--> This function takes results, frame and classes
--> results: contains labels and coordinates predicted by model on the given frame
--> classes: contains the strting labels
"""
labels, cord = results
n = len(labels)
x_shape, y_shape = frame.shape[1], frame.shape[0]
print(f"[INFO] Total {n} detections. . . ")
print(f"[INFO] Looping through all detections. . . ")
# looping through the detections
for i in range(n):
row = cord[i]
if row[4] >= 0.55: ### threshold value for detection. We are discarding everything below this value
print(f"[INFO] Extracting BBox coordinates. . . ")
x1, y1, x2, y2 = int(row[0] * x_shape), int(row[1] * y_shape), int(row[2] * x_shape), int(
row[3] * y_shape) ## BBOx coordniates
text_d = classes[int(labels[i])]
# cv2.imwrite("./output/dp.jpg",frame[int(y1):int(y2), int(x1):int(x2)])
coords = [x1, y1, x2, y2]
plate_num = recognize_plate_easyocr(img=frame, coords=coords, reader=EASY_OCR, region_threshold=OCR_TH)
print("Detected number is: " + plate_num + "\n")
# if text_d == 'mask':
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) # BBox
cv2.rectangle(frame, (x1, y1 - 20), (x2, y1), (0, 255, 0), -1) # for text label background
cv2.putText(frame, f"{plate_num}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
# cv2.imwrite("./output/np.jpg",frame[int(y1)-25:int(y2)+25, int(x1)-25:int(x2)+25])
return frame
# ---------------------------- function to recognize license plate --------------------------------------
# function to recognize license plate numbers using Tesseract OCR
def recognize_plate_easyocr(img, coords, reader, region_threshold):
# separate coordinates from box
xmin, ymin, xmax, ymax = coords
# get the subimage that makes up the bounded region and take an additional 5 pixels on each side
# nplate = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5]
nplate = img[int(ymin):int(ymax), int(xmin):int(xmax)] ### cropping the number plate from the whole image
ocr_result = reader.readtext(nplate)
text = filter_text(region=nplate, ocr_result=ocr_result, region_threshold=region_threshold)
if len(text) == 1:
text = text[0].upper()
return text
# to filter out wrong detections
def filter_text(region, ocr_result, region_threshold):
rectangle_size = region.shape[0] * region.shape[1]
plate = []
print(ocr_result)
for result in ocr_result:
length = np.sum(np.subtract(result[0][1], result[0][0]))
height = np.sum(np.subtract(result[0][2], result[0][1]))
if length * height / rectangle_size > region_threshold:
plate.append(result[1])
return plate
# --------------- for detection on video --------------------
print(f"[INFO] Working with video: {0}")
# reading the video
cap = cv2.VideoCapture(0)
vid_out = "webcam_facemask_result.mp4"
if vid_out != None: # creating the video writer if video output path is given
# by default VideoCapture returns float instead of int
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*'mp4v') # (*'XVID')
out = cv2.VideoWriter(vid_out, codec, fps, (width, height))
# assert cap.isOpened()
frame_no = 1
cv2.namedWindow("vid_out", cv2.WINDOW_NORMAL)
while True:
# start_time = time.time()
ret, frame = cap.read()
if ret and frame_no % 1 == 0:
print(f"[INFO] Working with frame {frame_no} ")
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = detectx(frame, model=model)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
frame = plot_boxes(results, frame, classes=classes)
cv2.imshow("vid_out", frame)
if vid_out:
print(f"[INFO] Saving output video. . . ")
out.write(frame)
if cv2.waitKey(5) & 0xFF == 27:
break
frame_no += 1
ret, buffer = cv2.imencode('.jpg', frame)
frame = buffer.tobytes()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
# print(f"[INFO] Cleaning up. . . ")
# # releaseing the writer
# out.release()
#
# # closing all windows
# cv2.destroyAllWindows()
#app.route('/')
def index():
return render_template('index.html')
#app.route('/video')
def video():
return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
if name == "main": app.run(debug=True)
I have had Tensorflow Lite running on a Raspberry Pi for a while to do some object detection, I have tried it on a few test models with no problems at all. Recently I tried to make my own model and I ran into this error. How can I fix this and does anyone know what is wrong with it?
This is my code for running Tensorflow Lite on my Pi
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def __init__(self,resolution=(640,480),framerate=30):
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
(self.grabbed, self.frame) = self.stream.read()
self.stopped = False
def start(self):
Thread(target=self.update,args=()).start()
return self
def update(self):
while True:
if self.stopped:
self.stream.release()
return
(self.grabbed, self.frame) = self.stream.read()
def read(self):
return self.frame
def stop(self):
self.stopped = True
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='1280x720')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
frame_rate_calc = 1
freq = cv2.getTickFrequency()
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
while True:
t1 = cv2.getTickCount()
frame1 = videostream.read()
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])[0]
classes = interpreter.get_tensor(output_details[1]['index'])[0]
scores = interpreter.get_tensor(output_details[2]['index'])[0]
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])]
label = '%s: %d%%' % (object_name, int(scores[i]*100))
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
label_ymin = max(ymin, labelSize[1] + 10)
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED)
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
cv2.imshow('Matthew De La Rosa: AP Research 2021-2022', frame)
t2 = cv2.getTickCount()
time1 = (t2-t1)/freq
frame_rate_calc= 1/time1
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
videostream.stop()
This is my error
Traceback (most recent call last):
File "TFLite_detection_webcam.py", line 137, in <module>
for i in range(len(scores)):
TypeError: object of type 'numpy.float32' has no len()
boxes = interpreter.get_tensor(output_details[1]['index'])[0]
classes = interpreter.get_tensor(output_details[3]['index'])[0]
scores = interpreter.get_tensor(output_details[0]['index'])[0]
hope this will fix your problem
score is of type numpy.float32 and so cannot be passed to len() but can instead be passed directly to range() if it can be converted to an int
for i in range(int(scores)):
if this is not possible it may be that scores is supposed to be a list and the issue lies earlier in the code
So I create a Neural Network(CNN) that can predict in real-time using opencv the gender of a person, everything works perfect, but, when I run the code OpenCv has so much lag, my webcam is not that bad, here is my code
'''
Real-time Face Gender Recognition using Conv-Nueral Network (CNN) and Cv2
Here we predict the save model that it is train
'''
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import cv2
import os
import cvlib as cv
import imutils
# load the model
model = load_model('gender_detection.model')
# open webcams and initiate the camara
webcam = cv2.VideoCapture(0, cv2.CAP_DSHOW)
classes = ['hombre', 'mujer']
# loop through frames
while webcam.isOpened():
# read frame from webcam
status, frame = webcam.read()
#webcam.set(cv2.CAP_PROP_FPS, 1000)
frame = cv2.flip(frame, 1)
# apply face detection
face, confidence = cv.detect_face(frame) # this detects that there is a face in the camara, cvlib does, but not if it is a man that detects the neural network
# loop through detected faces
for idx, f in enumerate(face):
# get corner points of face rectangle
# this only will draw a rectangle when the cvlib detects the face with the vars giving up there
startX, startY = f[0], f[1]
endX, endY = f[2], f[3]
# draw the rectangle over the face
cv2.rectangle(frame, (startX, startY), (endX, endY), (0,255,0), 2)
# crop the detected face region
face_crop = np.copy(frame[startY:endY, startX:endX])
if face_crop.shape[0] < 10 or face_crop.shape[1] < 10:
continue
# preprocessing for gender detection model
face_crop = cv2.resize(face_crop, (96,96))
face_crop = face_crop.astype("float") / 255.0
face_crop = img_to_array(face_crop)
face_crop = np.expand_dims(face_crop, axis=0)
# apply gender detection face with the model
conf = model.predict(face_crop)[0]
# get label with max acc
idx = np.argmax(conf)
label = classes[idx]
label = "{}: {:.2f}".format(label, conf[idx] * 100)
Y = startY - 10 if startY - 10 > 10 else startY + 10
# write label and confidence above the face rectangle
cv2.putText(frame, label, (startX, Y), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0,255,0), 2)
# display output
cv2.imshow("Gender Detection", frame)
# press "Q" to stop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#realese resources
webcam.release()
cv2.destroyAllWindows()
and I also tried to use cv2.CAP_PROB_FPS but that only helps a little bit, not much.
I've had this same problem using openCV video capture with text-detection. It's not the webcam quality, but the fact that openCV can only show you frames as fast as you can process them in your gender detection. The solution that worked for me is to use multi-threading.
You can create a thread for the OpenCV video capture, then another thread for your image processing. The caveat: you can't magically make your image processing happen quicker without making changes the image process itself. It takes as long as it takes. What you can do is allow openCV to work on its own and send the frames to an exchange class, then allow the image processing to grab a frame and work at its own pace as CV2 continues as normal.
Here is a (shortened) version of my class for OCR image processing. You can see that in start() i'm creating a thread pointed at the ocr() process. This is where your gender identification process can go.
class OCR:
# def __init__(self, exchange: VideoStream, language=None):
def __init__(self):
self.exchange = None
# init stuff for OCR not relevant to my example, but note that it
# takes a VideoStream class called exchange which is where this class
# grabs frames to process
def start(self):
Thread(target=self.ocr, args=()).start()
return self
def set_exchange(self, video_stream):
self.exchange = video_stream
def ocr(self):
while not self.stopped:
if self.exchange is not None:
frame = self.exchange.frame
# # # OCR stuff goes here
Now here is the VideoStream class that grabs frames at its own pace, in a different thread. The image processing class (OCR) can then take these frames at it's own pace and the two don't affect each other's performance.
class VideoStream:
"""Class for CV2 video capture. The start() method will create a new
thread to read the video stream"""
def __init__(self, src=0):
self.stream = cv2.VideoCapture(src)
(self.grabbed, self.frame) = self.stream.read()
# self._boxes = None
self.stopped = False
def start(self):
Thread(target=self.get, args=()).start()
return self
def get(self):
while not self.stopped:
(self.grabbed, self.frame) = self.stream.read()
def get_video_dimensions(self):
width = self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)
height = self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)
return int(width), int(height)
def stop_process(self):
self.stopped = True
Then you can do your CV2 imshow loop as you normally would.
exchange = VideoStream(0).start()
ocr = OCR().start()
ocr.set_exchange(exchange)
while True: # Begins a loop for the real-time OCR display
pressed_key = cv2.waitKey(1) & 0xFF
if pressed_key == ord('q'):
stop_stream_ocr(exchange, ocr)
break
frame = exchange.frame
cv2.imshow("Video Get Frame", frame)
cps1.increment()
Note that you can't really control what thread CV2 decides to use in its internal workings, but this approach will allow you to display your webcam at its natural fps and while image processing happens in the background.
I have a 1080p webcam (Logitech v-u0032) and I am creating program that saves still from it's feed every X seconds, however images have black bars on sides and resolution is seemingly lower than still taken with Windows built in camera app, both output file that has 1920x1080 pixels but difference in actual resolution is very visible
I've searched for ways to change the resolution but OpenCV seems to always upscale to new resolution
import cv2
import numpy as np
import datetime
import time
import os
cv2.namedWindow("Unregistered Hypercam")
vc = cv2.VideoCapture(0)
vc.set(3, 1920) # Set the horizontal resolution
vc.set(4, 1080) # Set the vertical resolution
vc.set(5, 10) # Set the framerate
if vc.isOpened():
rval, frame = vc.read()
else:
rval = False
lastSave = time.time()
def mfold(name):
try:
os.mkdir(name)
except OSError:
x = 0
else:
print ("Successfully created the directory %s " % fold)
while rval:
cv2.imshow("Unregistered Hypercam", frame)
rval, frame = vc.read()
now = datetime.datetime.now()
if time.time() > lastSave + 10:
lastSave = time.time()
fold = '\\snapshots\\' # This will create folder C:\snapshots
mfold(fold)
cv2.imwrite(fold + str(now.year) + '.' + str(now.month) + '.' + str(now.day) + ' ' + str(now.hour) + 'h_' + str(now.minute) + 'm_' + str(now.second) + 's.png', frame)
key = cv2.waitKey(20)
if key == 27:
break
cv2.destroyWindow("Unregistered Hypercam")
Images are blurry and unsharp with black bars and are nowhere similar to images taken with Windows camera app
Here's an example widget to save webcam/streams into video file or screenshot images. It keeps the original resolution of the stream. I used one of my IP camera streams instead of a webcam but it should work the same with a webcam. Currently it opens a stream and saves it as a video but you can modify it to take periodic screenshots. Also to modify the resolution, you can manually resize the frame while keeping aspect ratio with this function (it currently is not used in the widget).
Adjust resolution of frame while maintaining aspect ratio
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the 0idth and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)
You can use it like this to adjust width or height
rval, frame = vc.read()
resize_width = maintain_aspect_ratio_resize(frame, width=500)
resize_height = maintain_aspect_ratio_resize(frame, height=500)
Stream and save video widget
from threading import Thread
import cv2
class VideoWriterObject(object):
def __init__(self, src=0):
# Create a VideoCapture object
self.capture = cv2.VideoCapture(src)
# Default resolutions of the frame are obtained (system dependent)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
# Set up codec and output video settings
self.codec = cv2.VideoWriter_fourcc('M','J','P','G')
self.output_video = cv2.VideoWriter('output.avi', self.codec, 30, (self.frame_width, self.frame_height))
# Start the thread to read frames from the video stream
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
self.thread.start()
def update(self):
# Read the next frame from the stream in a different thread
while True:
if self.capture.isOpened():
(self.status, self.frame) = self.capture.read()
def show_frame(self):
# Display frames in main program
if self.status:
cv2.imshow('frame', self.frame)
# Press Q on keyboard to stop recording
key = cv2.waitKey(1)
if key == ord('q'):
self.capture.release()
self.output_video.release()
cv2.destroyAllWindows()
exit(1)
def save_frame(self):
# Save obtained frame into video output file
self.output_video.write(self.frame)
if __name__ == '__main__':
video_stream_widget = VideoWriterObject(0)
while True:
try:
video_stream_widget.show_frame()
video_stream_widget.save_frame()
except AttributeError:
pass
My code can detect an object now, which is human and other objects. But the issue that i faced now is i want to count the moving human entering a classroom. I would like to count in and out so that i can know the number of people in a room. How can do this? something like this https://www.youtube.com/watch?v=aEcBnD80nL
How can i make that detected object in rectangle to count in and out when they pass through the red and blue line?
import os
import cv2
import numpy as np
from picamera.array import PiRGBArray
from picamera import PiCamera
import tensorflow as tf
import argparse
import sys
# Set up camera constants
IM_WIDTH = 1280
IM_HEIGHT = 720
#IM_WIDTH = 640 Use smaller resolution for
#IM_HEIGHT = 480 slightly faster framerate
# Select camera type (if user enters --usbcam when calling this script,
# a USB webcam will be used)
camera_type = 'picamera'
parser = argparse.ArgumentParser()
parser.add_argument('--usbcam', help='Use a USB webcam instead of picamera',
action='store_true')
args = parser.parse_args()
if args.usbcam:
camera_type = 'usb'
# This is needed since the working directory is the object_detection folder.
sys.path.append('..')
# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util
# Name of the directory containing the object detection module we're using
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'data','mscoco_label_map.pbtxt')
# Number of classes the object detector can identify
NUM_CLASSES = 90
## Load the label map.
# Label maps map indices to category names, so that when the convolution
# network predicts `5`, we know that this corresponds to `airplane`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection
classifier
# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes =
detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
font = cv2.FONT_HERSHEY_SIMPLEX
# Initialize camera and perform object detection.
# The camera has to be set up and used differently depending on if it's a
# Picamera or USB webcam.
# I know this is ugly, but I basically copy+pasted the code for the object
# detection loop twice, and made one work for Picamera and the other work
# for USB.
### Picamera ###
if camera_type == 'picamera':
# Initialize Picamera and grab reference to the raw capture
camera = PiCamera()
camera.resolution = (IM_WIDTH,IM_HEIGHT)
camera.framerate = 10
rawCapture = PiRGBArray(camera, size=(IM_WIDTH,IM_HEIGHT))
rawCapture.truncate(0)
for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
t1 = cv2.getTickCount()
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
frame = frame1.array
frame.setflags(write=1)
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.40)
# Blue line
cv2.line(frame, (IM_WIDTH // 2, 0), (IM_WIDTH // 2 , IM_WIDTH), (250, 0, 1), 2)
# Red line
cv2.line(frame, (IM_WIDTH // 2 - 50, 0), (IM_WIDTH // 2 - 50, IM_WIDTH), (0, 0, 255), 2)
# FPS Text
cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
t2 = cv2.getTickCount()
time1 = (t2-t1)/freq
frame_rate_calc = 1/time1
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
rawCapture.truncate(0)
camera.close()