How to convert RasPi Python Code to JetsonNano compatible python code - python

I'm trying to create a stereo vision camera on my jetson nano with 2 raspi cameras. However, I can find a lot of information and code online regarding RasPi but not jetson nano. So for example let's say I have these 2 python programs, the first for starting both cameras on Jetson nano and the second for starting both cameras on RasPi. I'm quite new to all this, so it would be great to get some advice on how I could get started on this. Thanks!
Jetson (taken from JetsonHacks):
# MIT License
# Copyright (c) 2019,2020 JetsonHacks
# See license
# A very simple code snippet
# Using two CSI cameras (such as the Raspberry Pi Version 2) connected to a
# NVIDIA Jetson Nano Developer Kit (Rev B01) using OpenCV
# Drivers for the camera and OpenCV are included in the base image in JetPack 4.3+
# This script will open a window and place the camera stream from each camera in a window
# arranged horizontally.
# The camera streams are each read in their own thread, as when done sequentially there
# is a noticeable lag
# For better performance, the next step would be to experiment with having the window display
# in a separate thread
import cv2
import threading
import numpy as np
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
while cv2.getWindowProperty("CSI Cameras", 0) >= 0 :
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
RasPi (from https://github.com/realizator/stereopi-tutorial/blob/master/1_test.py):
# Copyright (C) 2019 Eugene Pomazov, <stereopi.com>, virt2real team
#
# This file is part of StereoPi tutorial scripts.
#
# StereoPi tutorial is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# StereoPi tutorial is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with StereoPi tutorial.
# If not, see <http://www.gnu.org/licenses/>.
#
# Most of this code is updated version of 3dberry.org project by virt2real
#
# Thanks to Adrian and http://pyimagesearch.com, as there are lot of
# code in this tutorial was taken from his lessons.
#
import picamera
from picamera import PiCamera
import time
import cv2
import numpy as np
import os
from datetime import datetime
# File for captured image
filename = './scenes/photo.png'
# Camera settimgs
cam_width = 1280
cam_height = 480
# Final image capture settings
scale_ratio = 0.5
# Camera resolution height must be dividable by 16, and width by 32
cam_width = int((cam_width+31)/32)*32
cam_height = int((cam_height+15)/16)*16
print ("Used camera resolution: "+str(cam_width)+" x "+str(cam_height))
# Buffer for captured image settings
img_width = int (cam_width * scale_ratio)
img_height = int (cam_height * scale_ratio)
capture = np.zeros((img_height, img_width, 4), dtype=np.uint8)
print ("Scaled image resolution: "+str(img_width)+" x "+str(img_height))
# Initialize the camera
camera = PiCamera(stereo_mode='side-by-side',stereo_decimate=False)
camera.resolution=(cam_width, cam_height)
camera.framerate = 20
camera.hflip = True
t2 = datetime.now()
counter = 0
avgtime = 0
# Capture frames from the camera
for frame in camera.capture_continuous(capture, format="bgra", use_video_port=True, resize=(img_width,img_height)):
counter+=1
t1 = datetime.now()
timediff = t1-t2
avgtime = avgtime + (timediff.total_seconds())
cv2.imshow("pair", frame)
key = cv2.waitKey(1) & 0xFF
t2 = datetime.now()
# if the `q` key was pressed, break from the loop and save last image
if key == ord("q") :
avgtime = avgtime/counter
print ("Average time between frames: " + str(avgtime))
print ("Average FPS: " + str(1/avgtime))
if (os.path.isdir("./scenes")==False):
os.makedirs("./scenes")
cv2.imwrite(filename, frame)
break

Related

How to retrieve class_name in detect.py example of TensorFlowLite?

I'm using the detect.py script from the official tensorflow lite examples for the raspberry pi. I want to print the the class name of the detected object. For example, if the object recognition model detected a surgical mask, it will print a "surgical_mask" in the command line. I was able to print the detection_result which shows these results in the command line:
detections {
bounding_box {
origin_x: 135
origin_y: 14
width: 478
height: 457
}
classes {
index: 0
score: 0.875
class_name: "surgical_mask"
}
}
I do not know how to print the "class_name"
Here is the code for the detect.py
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main script to run the object detection routine."""
import argparse
import sys
import time
import cv2
from tflite_support.task import core
from tflite_support.task import processor
from tflite_support.task import vision
import utils
def run(model: str, camera_id: int, width: int, height: int, num_threads: int,
enable_edgetpu: bool) -> None:
"""Continuously run inference on images acquired from the camera.
Args:
model: Name of the TFLite object detection model.
camera_id: The camera id to be passed to OpenCV.
width: The width of the frame captured from the camera.
height: The height of the frame captured from the camera.
num_threads: The number of CPU threads to run the model.
enable_edgetpu: True/False whether the model is a EdgeTPU model.
"""
# Variables to calculate FPS
counter, fps = 0, 0
start_time = time.time()
# Start capturing video input from the camera
cap = cv2.VideoCapture(camera_id)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
# Visualization parameters
row_size = 20 # pixels
left_margin = 24 # pixels
text_color = (0, 0, 255) # red
font_size = 1
font_thickness = 1
fps_avg_frame_count = 10
# Initialize the object detection model
base_options = core.BaseOptions(
file_name=model, use_coral=enable_edgetpu, num_threads=num_threads)
detection_options = processor.DetectionOptions(
max_results=3, score_threshold=0.3)
options = vision.ObjectDetectorOptions(
base_options=base_options, detection_options=detection_options)
detector = vision.ObjectDetector.create_from_options(options)
# Continuously capture images from the camera and run inference
while cap.isOpened():
success, image = cap.read()
if not success:
sys.exit(
'ERROR: Unable to read from webcam. Please verify your webcam settings.'
)
counter += 1
image = cv2.flip(image, 1)
# Convert the image from BGR to RGB as required by the TFLite model.
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Create a TensorImage object from the RGB image.
input_tensor = vision.TensorImage.create_from_array(rgb_image)
# Run object detection estimation using the model.
detection_result = detector.detect(input_tensor)
# Draw keypoints and edges on input image
image = utils.visualize(image, detection_result)
# Calculate the FPS
if counter % fps_avg_frame_count == 0:
end_time = time.time()
fps = fps_avg_frame_count / (end_time - start_time)
start_time = time.time()
# Show the FPS
fps_text = 'FPS = {:.1f}'.format(fps)
text_location = (left_margin, row_size)
cv2.putText(image, fps_text, text_location, cv2.FONT_HERSHEY_PLAIN,
font_size, text_color, font_thickness)
print(detection_result)
# Stop the program if the ESC key is pressed.
if cv2.waitKey(1) == 27:
break
cv2.imshow('object_detector', image)
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--model',
help='Path of the object detection model.',
required=False,
default='efficientdet_lite0.tflite')
parser.add_argument(
'--cameraId', help='Id of camera.', required=False, type=int, default=0)
parser.add_argument(
'--frameWidth',
help='Width of frame to capture from camera.',
required=False,
type=int,
default=640)
parser.add_argument(
'--frameHeight',
help='Height of frame to capture from camera.',
required=False,
type=int,
default=480)
parser.add_argument(
'--numThreads',
help='Number of CPU threads to run the model.',
required=False,
type=int,
default=4)
parser.add_argument(
'--enableEdgeTPU',
help='Whether to run the model on EdgeTPU.',
action='store_true',
required=False,
default=False)
args = parser.parse_args()
run(args.model, int(args.cameraId), args.frameWidth, args.frameHeight,
int(args.numThreads), bool(args.enableEdgeTPU))
if __name__ == '__main__':
main()
You can get the number of detections result by doing this
Num_of_detections = len(detection_result)
then use a for loop to loop the through the results
print(detection_result.detections[x].classes[0].class_name)
I am new to stackoverflow but i have a script that will do the task.
detectionIntermediate = (detection_result.detections)
lengthOfResult = (len(detectionIntermediate))
if(lengthOfResult>0):
print("Detection Results: ")
#print(detectionIntermediate )
li = list(str(detectionIntermediate).split('\n'))
detectedClassData = li[9] #output is in form of 'class_name: "door"'
formattedClassOutput = detectedClassData.replace("class_name: ","")
formattedClassOutput= formattedClassOutput.replace('"','')
print(formattedClassOutput)

yolov5 reduce the frame rate while using the camera as a source?

I am working on a project to predict the pedestrian path using a history and so on so i am using this paper and trying to run and implement the missing annotation in this
https://github.com/JunweiLiang/Multiverse/blob/master/SimAug
so i used yolov5 to solve this issue but i found that the fps is 30 and it takes a long time to process all these frames so i wanted to decrease the fps so for example it take only 2 frames per second and run the pipeline
(detection --> tracking) --> segmentation --> prediction and so on.
so basically here is what i did
import numpy as np
import cv2
import datetime
import queue
from threading import Thread
# global variables
stop_thread = False # controls thread execution
def start_capture_thread(cap, queue):
# global stop_thread
i=0
# continuously read fames from the camera
while True:
_, img = cap.read()
queue.put(img)
# cv2.imwrite('Images/frame{:d}.jpg'.format(i), img)
i=i+1
# if (stop_thread):
# break
def main():
global stop_thread
# create display window
cv2.namedWindow("webcam", cv2.WINDOW_NORMAL)
# initialize webcam capture object
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
# retrieve properties of the capture object
cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
print('* Capture width:', cap_width)
print('* Capture height:', cap_height)
print('* Capture FPS:', cap_fps)
# create a queue
frames_queue = queue.Queue(maxsize=0)
# start the capture thread: reads frames from the camera (non-stop) and stores the result in img
t = Thread(target=start_capture_thread, args=(cap, frames_queue,), daemon=True) # a deamon thread is killed when the application exits
t.start()
# initialize time and frame count variables
last_time = datetime.datetime.now()
frames = 0
cur_fps = 0
i=0
while (True):
if (frames_queue.empty()):
continue
if i%5 !=0 :
_ = frames_queue.get()
i+=1
continue
# blocks until the entire frame is read
frames += 1
# # measure runtime: current_time - last_time
# delta_time = datetime.datetime.now() - last_time
# elapsed_time = delta_time.total_seconds()
# # compute fps but avoid division by zero
# if (elapsed_time != 0):
# cur_fps = np.around(frames / elapsed_time, 1)
# retrieve an image from the queue
img = frames_queue.get()
cv2.imwrite('Images/frame{:d}.jpg'.format(i), img)
i+=1
# for i in range(10):
# _ = frames_queue.get()
# i+=1
# TODO: process the image here if needed
# draw FPS text and display image
# if (img is not None):
# cv2.putText(img, 'FPS: ' + str(cur_fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
# cv2.imshow("webcam", img)
# wait 1ms for ESC to be pressed
key = cv2.waitKey(1)
if (key == 27):
stop_thread = True
break
# release resources
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
and it works so i want to use this in the track.py in yolov5 instead of the
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
but i can't modify it as it used in the code like this
for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):
but i couldn't full understand this line
so i wanted to modify the LoadStreams function in here https://github.com/ultralytics/yolov5/blob/master/utils/datasets.py but i am kinda stuck here

Unable to save image from Raspi camera -> cv2.imwrite (Using Jetson Nano)

I am trying to save images from the Raspi cameras connected to my Jetson nano. My Code is below. However, the code shows that it is saving the files, but no matter which method I try I cannot find the images. Thanks for your help. I've included a smaller snippet of just the while loop itself so it will be easier for you all to refer to.
While loop:
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
import cv2
import threading
import numpy as np
import time
from datetime import datetime
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
#PiCam
# Photo session settings
total_photos = 30 # Number of images to take
countdown = 5 # Interval for count-down timer, seconds
font=cv2.FONT_HERSHEY_SIMPLEX # Cowntdown timer font
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI-AV Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
counter = 0
t2 = datetime.now()
#Main stuff here
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
Put breakpoint on line where you want to save image. Inspect image you want to save:
Does it have data inside?
Does it have camera_images.shape ?
What is return value of cv2.imwrite function?
Does path you are trying to write really exist?
Did you appended .png or .jpg ?

Speed up the creation of a circular ROI on a 4K 25 fps video in python opencv

I am working with a 2h-long 4K MP4 video shot at 25 fps. I am using openCV in python 3. From the video, I need to extract 3 circular ROIs.
Because of the large number of frames (212831), processing the whole video frame by frame with the code below takes over 24h on a 64 GB RAM workstation. Is there a way to speed up the processing or a workaround that does something slightly different to a similar effect?
Here is the code.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)
This is a common mistake. You shouldn't call waitKey(1) when you want to process the frames as fast possible. That function is basically add a short sleep after processing each frame, and that sleep time is much longer that processing time.
You just need to remove that, and still you can just kill the process if want to stop in the middle.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)

How to pass image grabbed from camera to tf.read_file

I have a function which grabs an image from a camera:
img = cam.read()
Now I want to read that image with tf.read_file,
but when I do img_raw = tf.read_file(img), it doesn't work.
This is the code for the camera class:
"""camera.py
This code implements the Camera class, which encapsulates code to
handle IP CAM, USB webcam or the Jetson onboard camera. The Camera
class is further extend to take either a video or an image file as
input.
"""
import time
import logging
import threading
import numpy as np
import cv2
def open_cam_rtsp(uri, width, height, latency):
"""Open an RTSP URI (IP CAM)."""
gst_str = ('rtspsrc location={} latency={} ! '
'rtph264depay ! h264parse ! omxh264dec ! '
'nvvidconv ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)BGRx ! videoconvert ! '
'appsink').format(uri, latency, width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_usb(dev, width, height):
"""Open a USB webcam.
We want to set width and height here, otherwise we could just do:
return cv2.VideoCapture(dev)
"""
gst_str = ('v4l2src device=/dev/video{} ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)RGB ! videoconvert ! '
'appsink').format(dev, width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_onboard(width, height):
"""Open the Jetson onboard camera.
On versions of L4T prior to 28.1, you might need to add
'flip-method=2' into gst_str.
"""
gst_str = ('nvcamerasrc ! '
'video/x-raw(memory:NVMM), '
'width=(int)2592, height=(int)1458, '
'format=(string)I420, framerate=(fraction)30/1 ! '
'nvvidconv ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)BGRx ! videoconvert ! '
'appsink').format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def grab_img(cam):
"""This 'grab_img' function is designed to be run in the sub-thread.
Once started, this thread continues to grab a new image and put it
into the global 'img_handle', until 'thread_running' is set to False.
"""
while cam.thread_running:
if cam.args.use_image:
assert cam.img_handle is not None, 'img_handle is empty in use_image case!'
# keep using the same img, no need to update it
time.sleep(0.01) # yield CPU to other threads
else:
_, cam.img_handle = cam.cap.read()
if cam.img_handle is None:
logging.warning('grab_img(): cap.read() returns None...')
break
cam.thread_running = False
class Camera():
"""Camera class which supports reading images from theses video sources:
1. Video file
2. Image (jpg, png, etc.) file, repeating indefinitely
3. RTSP (IP CAM)
4. USB webcam
5. Jetson onboard camera
"""
def __init__(self, args):
self.args = args
self.is_opened = False
self.thread_running = False
self.img_handle = None
self.img_width = 0
self.img_height = 0
self.cap = None
self.thread = None
def open(self):
"""Open camera based on command line arguments."""
assert self.cap is None, 'Camera is already opened!'
args = self.args
if args.use_file:
self.cap = cv2.VideoCapture(args.filename)
# ignore image width/height settings here
elif args.use_image:
self.cap = 'OK'
self.img_handle = cv2.imread(args.filename)
# ignore image width/height settings here
if self.img_handle is not None:
self.is_opened = True
self.img_height, self.img_width, _ = self.img_handle.shape
elif args.use_rtsp:
self.cap = open_cam_rtsp(
args.rtsp_uri,
args.image_width,
args.image_height,
args.rtsp_latency
)
elif args.use_usb:
self.cap = open_cam_usb(
args.video_dev,
args.image_width,
args.image_height
)
else: # by default, use the jetson onboard camera
self.cap = open_cam_onboard(
args.image_width,
args.image_height
)
if self.cap != 'OK':
if self.cap.isOpened():
# Try to grab the 1st image and determine width and height
_, img = self.cap.read()
if img is not None:
self.img_height, self.img_width, _ = img.shape
self.is_opened = True
def start(self):
assert not self.thread_running
self.thread_running = True
self.thread = threading.Thread(target=grab_img, args=(self,))
self.thread.start()
def stop(self):
self.thread_running = False
self.thread.join()
def read(self):
if self.args.use_image:
return np.copy(self.img_handle)
else:
return self.img_handle
def release(self):
assert not self.thread_running
if self.cap != 'OK':
self.cap.release()
So in another python script I want to use this camera class in order to grab a image and load it with tf.read_file:
from utils.camera import Camera
import numpy as np
import cv2
import tensorflow as tf
cam = Camera(args)
cam.open()
cam.start()
img = cam.read()
img_raw = tf.read_file(img)#here read_file expects a string but I am passing img which is an array
No need to call tf.read_file(img).
ret_val, img = cam.read()
img = tf.convert_to_tensor(img, dtype=tf.float32)
img = tf.image.resize(img, (HEIGHT, WIDTH))
images = tf.expand_dims(img, axis=0) / 255.0
Results = model.predict(images)

Categories

Resources