How to retrieve class_name in detect.py example of TensorFlowLite?

How to retrieve class_name in detect.py example of TensorFlowLite? - python

I'm using the detect.py script from the official tensorflow lite examples for the raspberry pi. I want to print the the class name of the detected object. For example, if the object recognition model detected a surgical mask, it will print a "surgical_mask" in the command line. I was able to print the detection_result which shows these results in the command line:
detections {
bounding_box {
origin_x: 135
origin_y: 14
width: 478
height: 457
}
classes {
index: 0
score: 0.875
class_name: "surgical_mask"
}
}
I do not know how to print the "class_name"
Here is the code for the detect.py
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main script to run the object detection routine."""
import argparse
import sys
import time
import cv2
from tflite_support.task import core
from tflite_support.task import processor
from tflite_support.task import vision
import utils
def run(model: str, camera_id: int, width: int, height: int, num_threads: int,
enable_edgetpu: bool) -> None:
"""Continuously run inference on images acquired from the camera.
Args:
model: Name of the TFLite object detection model.
camera_id: The camera id to be passed to OpenCV.
width: The width of the frame captured from the camera.
height: The height of the frame captured from the camera.
num_threads: The number of CPU threads to run the model.
enable_edgetpu: True/False whether the model is a EdgeTPU model.
"""
# Variables to calculate FPS
counter, fps = 0, 0
start_time = time.time()
# Start capturing video input from the camera
cap = cv2.VideoCapture(camera_id)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
# Visualization parameters
row_size = 20 # pixels
left_margin = 24 # pixels
text_color = (0, 0, 255) # red
font_size = 1
font_thickness = 1
fps_avg_frame_count = 10
# Initialize the object detection model
base_options = core.BaseOptions(
file_name=model, use_coral=enable_edgetpu, num_threads=num_threads)
detection_options = processor.DetectionOptions(
max_results=3, score_threshold=0.3)
options = vision.ObjectDetectorOptions(
base_options=base_options, detection_options=detection_options)
detector = vision.ObjectDetector.create_from_options(options)
# Continuously capture images from the camera and run inference
while cap.isOpened():
success, image = cap.read()
if not success:
sys.exit(
'ERROR: Unable to read from webcam. Please verify your webcam settings.'
)
counter += 1
image = cv2.flip(image, 1)
# Convert the image from BGR to RGB as required by the TFLite model.
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Create a TensorImage object from the RGB image.
input_tensor = vision.TensorImage.create_from_array(rgb_image)
# Run object detection estimation using the model.
detection_result = detector.detect(input_tensor)
# Draw keypoints and edges on input image
image = utils.visualize(image, detection_result)
# Calculate the FPS
if counter % fps_avg_frame_count == 0:
end_time = time.time()
fps = fps_avg_frame_count / (end_time - start_time)
start_time = time.time()
# Show the FPS
fps_text = 'FPS = {:.1f}'.format(fps)
text_location = (left_margin, row_size)
cv2.putText(image, fps_text, text_location, cv2.FONT_HERSHEY_PLAIN,
font_size, text_color, font_thickness)
print(detection_result)
# Stop the program if the ESC key is pressed.
if cv2.waitKey(1) == 27:
break
cv2.imshow('object_detector', image)
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--model',
help='Path of the object detection model.',
required=False,
default='efficientdet_lite0.tflite')
parser.add_argument(
'--cameraId', help='Id of camera.', required=False, type=int, default=0)
parser.add_argument(
'--frameWidth',
help='Width of frame to capture from camera.',
required=False,
type=int,
default=640)
parser.add_argument(
'--frameHeight',
help='Height of frame to capture from camera.',
required=False,
type=int,
default=480)
parser.add_argument(
'--numThreads',
help='Number of CPU threads to run the model.',
required=False,
type=int,
default=4)
parser.add_argument(
'--enableEdgeTPU',
help='Whether to run the model on EdgeTPU.',
action='store_true',
required=False,
default=False)
args = parser.parse_args()
run(args.model, int(args.cameraId), args.frameWidth, args.frameHeight,
int(args.numThreads), bool(args.enableEdgeTPU))
if __name__ == '__main__':
main()

You can get the number of detections result by doing this
Num_of_detections = len(detection_result)
then use a for loop to loop the through the results
print(detection_result.detections[x].classes[0].class_name)

I am new to stackoverflow but i have a script that will do the task.
detectionIntermediate = (detection_result.detections)
lengthOfResult = (len(detectionIntermediate))
if(lengthOfResult>0):
print("Detection Results: ")
#print(detectionIntermediate )
li = list(str(detectionIntermediate).split('\n'))
detectedClassData = li[9] #output is in form of 'class_name: "door"'
formattedClassOutput = detectedClassData.replace("class_name: ","")
formattedClassOutput= formattedClassOutput.replace('"','')
print(formattedClassOutput)

Related

How to load a TF Lite Model into Python from a file

I've followed the End-to-End image classification tutorial for tensorflow lite and have created and saved my model as '/path/to/model.tflite'.
What I haven't been able to figure out is how to load it.
I'm looking for some kind of syntax that is similar to this:
from tflite_model_maker import image_classifier
from tflite_model_maker.image_classifier import DataLoader
model = image_classifier.Load('/path/to/model.tflite')
I'm sure I'm missing something obvious here. This is definitely not the first place I've looked at. This seems to be the best place for me to find what I need, but the syntax used confuses me.
What do I want to be able to do with the model?
test = DataLoader.from_folder('/path/to/testImages')
loss, accuracy = model.evaluate(test)
# A helper function that returns 'red'/'black' depending on if its two input
# parameter matches or not.
def get_label_color(val1, val2):
if val1 == val2:
return 'black'
else:
return 'red'
# Then plot 100 test images and their predicted labels.
# If a prediction result is different from the label provided label in "test"
# dataset, we will highlight it in red color.
test_data = data
plt.figure(figsize=(20, 20))
predicts = model.predict_top_k(test_data)
for i, (image, label) in enumerate(test_data.gen_dataset().unbatch().take(100)):
ax = plt.subplot(10, 10, i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(image.numpy(), cmap=plt.cm.gray)
predict_label = predicts[i][0][0]
color = get_label_color(predict_label,
test_data.index_to_label[label.numpy()])
ax.xaxis.label.set_color(color)
plt.xlabel('Predicted: %s' % predict_label)
plt.show()
From the syntax above it seems the model isn't just a file but is a type/class/method depending on what name is most suitable for python.
Feels like this should only take one line of code but I haven't been able to find it anywhere.

Managed to do a simple version of it. The images coming up as a stream doesn't work for me using cv2 with Windows as it does for the pi. So instead I created a webpage in the same directory as this script. This generates an image with the bounding box, using a specified tflite model. This is in no way ideal.
It uses a webcam to get the image and saves the image to the directory the script is run in. It then renames the file so it can be viewed by the webpage I setup to view it.
The majority of this code comes from the TFLite Object Detection Raspberry Pi sample.
import time, os
from PIL import Image
from tflite_support import metadata
import platform
from typing import List, NamedTuple
import json
import cv2 as cv2
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
Interpreter = tf.lite.Interpreter
load_delegate = tf.lite.experimental.load_delegate
class ObjectDetectorOptions(NamedTuple):
"""A config to initialize an object detector."""
enable_edgetpu: bool = False
"""Enable the model to run on EdgeTPU."""
label_allow_list: List[str] = None
"""The optional allow list of labels."""
label_deny_list: List[str] = None
"""The optional deny list of labels."""
max_results: int = -1
"""The maximum number of top-scored detection results to return."""
num_threads: int = 1
"""The number of CPU threads to be used."""
score_threshold: float = 0.0
"""The score threshold of detection results to return."""
class Rect(NamedTuple):
"""A rectangle in 2D space."""
left: float
top: float
right: float
bottom: float
class Category(NamedTuple):
"""A result of a classification task."""
label: str
score: float
index: int
class Detection(NamedTuple):
"""A detected object as the result of an ObjectDetector."""
bounding_box: Rect
categories: List[Category]
def edgetpu_lib_name():
"""Returns the library name of EdgeTPU in the current platform."""
return {
'Darwin': 'libedgetpu.1.dylib',
'Linux': 'libedgetpu.so.1',
'Windows': 'edgetpu.dll',
}.get(platform.system(), None)
class ObjectDetector:
"""A wrapper class for a TFLite object detection model."""
_OUTPUT_LOCATION_NAME = 'location'
_OUTPUT_CATEGORY_NAME = 'category'
_OUTPUT_SCORE_NAME = 'score'
_OUTPUT_NUMBER_NAME = 'number of detections'
def __init__(
self,
model_path: str,
options: ObjectDetectorOptions = ObjectDetectorOptions()
) -> None:
"""Initialize a TFLite object detection model.
Args:
model_path: Path to the TFLite model.
options: The config to initialize an object detector. (Optional)
Raises:
ValueError: If the TFLite model is invalid.
OSError: If the current OS isn't supported by EdgeTPU.
"""
# Load metadata from model.
displayer = metadata.MetadataDisplayer.with_model_file(model_path)
# Save model metadata for preprocessing later.
model_metadata = json.loads(displayer.get_metadata_json())
process_units = model_metadata['subgraph_metadata'][0]['input_tensor_metadata'][0]['process_units']
mean = 0.0
std = 1.0
for option in process_units:
if option['options_type'] == 'NormalizationOptions':
mean = option['options']['mean'][0]
std = option['options']['std'][0]
self._mean = mean
self._std = std
# Load label list from metadata.
file_name = displayer.get_packed_associated_file_list()[0]
label_map_file = displayer.get_associated_file_buffer(file_name).decode()
label_list = list(filter(lambda x: len(x) > 0, label_map_file.splitlines()))
self._label_list = label_list
# Initialize TFLite model.
if options.enable_edgetpu:
if edgetpu_lib_name() is None:
raise OSError("The current OS isn't supported by Coral EdgeTPU.")
interpreter = Interpreter(
model_path=model_path,
experimental_delegates=[load_delegate(edgetpu_lib_name())],
num_threads=options.num_threads)
else:
interpreter = Interpreter(
model_path=model_path, num_threads=options.num_threads)
interpreter.allocate_tensors()
input_detail = interpreter.get_input_details()[0]
# From TensorFlow 2.6, the order of the outputs become undefined.
# Therefore we need to sort the tensor indices of TFLite outputs and to know
# exactly the meaning of each output tensor. For example, if
# output indices are [601, 599, 598, 600], tensor names and indices aligned
# are:
# - location: 598
# - category: 599
# - score: 600
# - detection_count: 601
# because of the op's ports of TFLITE_DETECTION_POST_PROCESS
# (https://github.com/tensorflow/tensorflow/blob/a4fe268ea084e7d323133ed7b986e0ae259a2bc7/tensorflow/lite/kernels/detection_postprocess.cc#L47-L50).
sorted_output_indices = sorted(
[output['index'] for output in interpreter.get_output_details()])
self._output_indices = {
self._OUTPUT_LOCATION_NAME: sorted_output_indices[0],
self._OUTPUT_CATEGORY_NAME: sorted_output_indices[1],
self._OUTPUT_SCORE_NAME: sorted_output_indices[2],
self._OUTPUT_NUMBER_NAME: sorted_output_indices[3],
}
self._input_size = input_detail['shape'][2], input_detail['shape'][1]
self._is_quantized_input = input_detail['dtype'] == np.uint8
self._interpreter = interpreter
self._options = options
def detect(self, input_image: np.ndarray) -> List[Detection]:
"""Run detection on an input image.
Args:
input_image: A [height, width, 3] RGB image. Note that height and width
can be anything since the image will be immediately resized according
to the needs of the model within this function.
Returns:
A Person instance.
"""
image_height, image_width, _ = input_image.shape
input_tensor = self._preprocess(input_image)
self._set_input_tensor(input_tensor)
self._interpreter.invoke()
# Get all output details
boxes = self._get_output_tensor(self._OUTPUT_LOCATION_NAME)
classes = self._get_output_tensor(self._OUTPUT_CATEGORY_NAME)
scores = self._get_output_tensor(self._OUTPUT_SCORE_NAME)
count = int(self._get_output_tensor(self._OUTPUT_NUMBER_NAME))
return self._postprocess(boxes, classes, scores, count, image_width,
image_height)
def _preprocess(self, input_image: np.ndarray) -> np.ndarray:
"""Preprocess the input image as required by the TFLite model."""
# Resize the input
input_tensor = cv2.resize(input_image, self._input_size)
# Normalize the input if it's a float model (aka. not quantized)
if not self._is_quantized_input:
input_tensor = (np.float32(input_tensor) - self._mean) / self._std
# Add batch dimension
input_tensor = np.expand_dims(input_tensor, axis=0)
return input_tensor
def _set_input_tensor(self, image):
"""Sets the input tensor."""
tensor_index = self._interpreter.get_input_details()[0]['index']
input_tensor = self._interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = image
def _get_output_tensor(self, name):
"""Returns the output tensor at the given index."""
output_index = self._output_indices[name]
tensor = np.squeeze(self._interpreter.get_tensor(output_index))
return tensor
def _postprocess(self, boxes: np.ndarray, classes: np.ndarray,
scores: np.ndarray, count: int, image_width: int,
image_height: int) -> List[Detection]:
"""Post-process the output of TFLite model into a list of Detection objects.
Args:
boxes: Bounding boxes of detected objects from the TFLite model.
classes: Class index of the detected objects from the TFLite model.
scores: Confidence scores of the detected objects from the TFLite model.
count: Number of detected objects from the TFLite model.
image_width: Width of the input image.
image_height: Height of the input image.
Returns:
A list of Detection objects detected by the TFLite model.
"""
results = []
# Parse the model output into a list of Detection entities.
for i in range(count):
if scores[i] >= self._options.score_threshold:
y_min, x_min, y_max, x_max = boxes[i]
bounding_box = Rect(
top=int(y_min * image_height),
left=int(x_min * image_width),
bottom=int(y_max * image_height),
right=int(x_max * image_width))
class_id = int(classes[i])
category = Category(
score=scores[i],
label=self._label_list[class_id], # 0 is reserved for background
index=class_id)
result = Detection(bounding_box=bounding_box, categories=[category])
results.append(result)
# Sort detection results by score ascending
sorted_results = sorted(
results,
key=lambda detection: detection.categories[0].score,
reverse=True)
# Filter out detections in deny list
filtered_results = sorted_results
if self._options.label_deny_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label not in self.
_options.label_deny_list, filtered_results))
# Keep only detections in allow list
if self._options.label_allow_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label in self._options.
label_allow_list, filtered_results))
# Only return maximum of max_results detection.
if self._options.max_results > 0:
result_count = min(len(filtered_results), self._options.max_results)
filtered_results = filtered_results[:result_count]
return filtered_results
_MARGIN = 10 # pixels
_ROW_SIZE = 10 # pixels
_FONT_SIZE = 1
_FONT_THICKNESS = 1
_TEXT_COLOR = (0, 0, 255) # red
def visualize(
image: np.ndarray,
detections: List[Detection],
) -> np.ndarray:
"""Draws bounding boxes on the input image and return it.
Args:
image: The input RGB image.
detections: The list of all "Detection" entities to be visualize.
Returns:
Image with bounding boxes.
"""
for detection in detections:
# Draw bounding_box
start_point = detection.bounding_box.left, detection.bounding_box.top
end_point = detection.bounding_box.right, detection.bounding_box.bottom
cv2.rectangle(image, start_point, end_point, _TEXT_COLOR, 3)
# Draw label and score
category = detection.categories[0]
class_name = category.label
probability = round(category.score, 2)
result_text = class_name + ' (' + str(probability) + ')'
text_location = (_MARGIN + detection.bounding_box.left,
_MARGIN + _ROW_SIZE + detection.bounding_box.top)
cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
_FONT_SIZE, _TEXT_COLOR, _FONT_THICKNESS)
return image
# ---------------------------------- #
# This is where the custom code starts
# ---------------------------------- #
# Load the TFLite model
TFLITE_MODEL_PATH='object.tflite'
DETECTION_THRESHOLD = 0.5 # 50% threshold required before identifying
options = ObjectDetectorOptions(
num_threads=4,
score_threshold=DETECTION_THRESHOLD,
)
# Close camera if already open
try:
cap.release()
except:
print("",end="") # do nothing
detector = ObjectDetector(model_path=TFLITE_MODEL_PATH, options=options)
cap = cv2.VideoCapture(0) #webcam
counter = 0 # Store many times model has run
while cap.isOpened():
success, image = cap.read()
if not success:
sys.exit(
'ERROR: Unable to read from webcam. Please verify your webcam settings.'
)
image = cv2.flip(image, 1)
# Convert the image from BGR to RGB as required by the TFLite model.
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#image.thumbnail((512, 512), Image.ANTIALIAS)
image_np = np.asarray(image)
# Run object detection estimation using the model.
detections = detector.detect(image_np)
# Draw keypoints and edges on input image
image_np = visualize(image_np, detections)
if counter == 10: # <- Change this to decide how many iterations
cap.release()
break
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
plt.imsave('tmp.jpg',image_np) # Saves the image
os.replace("tmp.jpg", "web.jpg",) # Renames it for the webpage
counter += 1
print(counter)
cap.release()
Here's the HTML for the document placed in the same directory as the python file, I saved it as index.html and opened in the browser while running the python script above.
<!DOCTYPE html>
<html>
<head>
<title>Object Detection</title>
</head>
<body>
<h1>Object Detection</h1>
<p>This displays images saved during detection process</p>
<canvas id="x" width="700px" height="500px"></canvas>
<script>
var newImage = new Image();
newImage.src = "web.jpg";
var canvas = document.getElementById("x");
var context = canvas.getContext("2d");
newImage.onload = function() {
context.drawImage(newImage, 0, 0);
console.log("trigger")
setTimeout(timedRefresh, 1000);
};
function timedRefresh() {
// just change src attribute, will always trigger the onload callback
try {
newImage.src = ("web.jpg#" + new Date().getTime());
}catch(e){
console.log(e);
}
}
setTimeout(timedRefresh, 100);
</script>
</body>
</html>
It's incredibly slow, not ideal in many ways and probably breaks many good coding conventions. It was only used locally, would definitely not use this for a production environment nor recommend its use. Just needed a quick proof of concept and this worked for that.

How to convert RasPi Python Code to JetsonNano compatible python code

I'm trying to create a stereo vision camera on my jetson nano with 2 raspi cameras. However, I can find a lot of information and code online regarding RasPi but not jetson nano. So for example let's say I have these 2 python programs, the first for starting both cameras on Jetson nano and the second for starting both cameras on RasPi. I'm quite new to all this, so it would be great to get some advice on how I could get started on this. Thanks!
Jetson (taken from JetsonHacks):
# MIT License
# Copyright (c) 2019,2020 JetsonHacks
# See license
# A very simple code snippet
# Using two CSI cameras (such as the Raspberry Pi Version 2) connected to a
# NVIDIA Jetson Nano Developer Kit (Rev B01) using OpenCV
# Drivers for the camera and OpenCV are included in the base image in JetPack 4.3+
# This script will open a window and place the camera stream from each camera in a window
# arranged horizontally.
# The camera streams are each read in their own thread, as when done sequentially there
# is a noticeable lag
# For better performance, the next step would be to experiment with having the window display
# in a separate thread
import cv2
import threading
import numpy as np
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
while cv2.getWindowProperty("CSI Cameras", 0) >= 0 :
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
RasPi (from https://github.com/realizator/stereopi-tutorial/blob/master/1_test.py):
# Copyright (C) 2019 Eugene Pomazov, <stereopi.com>, virt2real team
#
# This file is part of StereoPi tutorial scripts.
#
# StereoPi tutorial is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# StereoPi tutorial is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with StereoPi tutorial.
# If not, see <http://www.gnu.org/licenses/>.
#
# Most of this code is updated version of 3dberry.org project by virt2real
#
# Thanks to Adrian and http://pyimagesearch.com, as there are lot of
# code in this tutorial was taken from his lessons.
#
import picamera
from picamera import PiCamera
import time
import cv2
import numpy as np
import os
from datetime import datetime
# File for captured image
filename = './scenes/photo.png'
# Camera settimgs
cam_width = 1280
cam_height = 480
# Final image capture settings
scale_ratio = 0.5
# Camera resolution height must be dividable by 16, and width by 32
cam_width = int((cam_width+31)/32)*32
cam_height = int((cam_height+15)/16)*16
print ("Used camera resolution: "+str(cam_width)+" x "+str(cam_height))
# Buffer for captured image settings
img_width = int (cam_width * scale_ratio)
img_height = int (cam_height * scale_ratio)
capture = np.zeros((img_height, img_width, 4), dtype=np.uint8)
print ("Scaled image resolution: "+str(img_width)+" x "+str(img_height))
# Initialize the camera
camera = PiCamera(stereo_mode='side-by-side',stereo_decimate=False)
camera.resolution=(cam_width, cam_height)
camera.framerate = 20
camera.hflip = True
t2 = datetime.now()
counter = 0
avgtime = 0
# Capture frames from the camera
for frame in camera.capture_continuous(capture, format="bgra", use_video_port=True, resize=(img_width,img_height)):
counter+=1
t1 = datetime.now()
timediff = t1-t2
avgtime = avgtime + (timediff.total_seconds())
cv2.imshow("pair", frame)
key = cv2.waitKey(1) & 0xFF
t2 = datetime.now()
# if the `q` key was pressed, break from the loop and save last image
if key == ord("q") :
avgtime = avgtime/counter
print ("Average time between frames: " + str(avgtime))
print ("Average FPS: " + str(1/avgtime))
if (os.path.isdir("./scenes")==False):
os.makedirs("./scenes")
cv2.imwrite(filename, frame)
break

How to crop the image with the boundling rectangle after detecting it with a Yolo net

I have created a model to recognize objects in an image, and it works fine for me, I have the code that detects the object according to the weights already trained and so on, but I would need to create a new image only with what I have detected, for example, if I have one image of a cat in a park, I want to create a new image only with the cat that I have detected, how could I do that? I give you my current code, in which I detect the object:
from __future__ import division
from models import *
from utils.utils import *
from utils.datasets import *
import os
import sys
import time
import datetime
import argparse
from PIL import Image
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset")
parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression")
parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
opt = parser.parse_args()
print(opt)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("output", exist_ok=True)
# Set up model
model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
if opt.weights_path.endswith(".weights"):
# Load darknet weights
model.load_darknet_weights(opt.weights_path)
else:
# Load checkpoint weights
model.load_state_dict(torch.load(opt.weights_path))
model.eval() # Set in evaluation mode
dataloader = DataLoader(
ImageFolder(opt.image_folder, img_size=opt.img_size),
batch_size=opt.batch_size,
shuffle=False,
num_workers=opt.n_cpu,
)
classes = load_classes(opt.class_path) # Extracts class labels from file
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
imgs = [] # Stores image paths
img_detections = [] # Stores detections for each image index
print("\nPerforming object detection:")
prev_time = time.time()
for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
# Configure input
input_imgs = Variable(input_imgs.type(Tensor))
# Get detections
with torch.no_grad():
detections = model(input_imgs)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
# Log progress
current_time = time.time()
inference_time = datetime.timedelta(seconds=current_time - prev_time)
prev_time = current_time
print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))
# Save image and detections
imgs.extend(img_paths)
img_detections.extend(detections)
# Bounding-box colors
cmap = plt.get_cmap("tab20b")
colors = [cmap(i) for i in np.linspace(0, 1, 20)]
print("\nSaving images:")
# Iterate through images and save plot of detections
for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
print("(%d) Image: '%s'" % (img_i, path))
# Create plot
img = np.array(Image.open(path))
plt.figure()
fig, ax = plt.subplots(1)
ax.imshow(img)
# Draw bounding boxes and labels of detections
if detections is not None:
# Rescale boxes to original image
detections = rescale_boxes(detections, opt.img_size, img.shape[:2])
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
bbox_colors = random.sample(colors, n_cls_preds)
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))
box_w = x2 - x1
box_h = y2 - y1
color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
# Create a Rectangle patch
bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
# Add the bbox to the plot
ax.add_patch(bbox)
# Add label
plt.text(
x1,
y1,
s=classes[int(cls_pred)],
color="white",
verticalalignment="top",
bbox={"color": color, "pad": 0},
)
# Save generated image with detections
plt.axis("off")
plt.gca().xaxis.set_major_locator(NullLocator())
plt.gca().yaxis.set_major_locator(NullLocator())
filename = path.split("/")[-1].split(".")[0]
plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
plt.close()
How could I add to this code a fragment to cut the object that I have detected? Thank you very much, in case it was necessary for the solution, I am using pytho3 and tensorflow 2, again, thank you very much

You can crop the image using this slicing notation:
crop = img[y1:y1+box_height, x1:x1+box_width, :];
So long as the numpy array is an numpy.uint8 it should be straigtforward to save:
pimg = Image.fromarray(img);
pimg.save("test.png");

solved with:
img = cv2.imread('./202.jpg')
crop_img = img[y:y+h, x:x+w]
#cv2.imshow('img', crop_img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
cv2.imwrite('./pruebas/resultado.png', crop_img)

global name 'SIFT' is not defined in python(Pi 3 B)

When I ran this sample code on window with opencv 3.0.0, sift = cv2.SIFT() works, and when I ran the same code with change to sift = cv2.xfeatures2d.SIFT_create() in Raspberry Pi with opencv 3.2.0, it shows this error,
"global name 'SIFT' is not defined"
#!/usr/bin/env python
# Copyright 2014 Jarmo Puttonen <jarmo.puttonen#gmail.com>. All rights reserved.
# Use of this source code is governed by a MIT-style
# licence that can be found in the LICENCE file.
"""Detect speed limits from webcam feed"""
import re
import os
import sys
import cv2
import threading
import numpy as np
import argparse
#from gps import gps, WATCH_ENABLE
def onTrackbarChange(_):
"""Pass any trackbar changes"""
class GPSInfo(threading.Thread):
"""Thread that takes care of updating GPS Speed info."""
def __init__(self):
threading.Thread.__init__(self)
self.gpsd = gps(mode=WATCH_ENABLE)
self.running = True
self.speed = 0
def run(self):
while self.running:
self.gpsd.next()
self.speed = self.gpsd.fix.speed*3.6 #from m/s to km/h
def read_paths(path):
"""Returns a list of files in given path"""
images = \[\[\] for _ in range(2)\]
for dirname, dirnames, _ in os.walk(path):
for subdirname in dirnames:
filepath = os.path.join(dirname, subdirname)
for filename in os.listdir(filepath):
try:
imgpath = str(os.path.join(filepath, filename))
images\[0\].append(imgpath)
limit = re.findall('\[0-9\]+', filename)
images\[1\].append(limit\[0\])
except IOError, (errno, strerror):
print "I/O error({0}): {1}".format(errno, strerror)
except:
print "Unexpected error:", sys.exc_info()\[0\]
raise
return images
def load_images(imgpath):
"""Loads images in given path and returns
a list containing image and keypoints"""
images = read_paths(imgpath)
imglist = \[\[\], \[\], \[\], \[\]\]
cur_img = 0
SIFT = cv2.xfeatures2d.SIFT_create()
for i in images\[0\]:
img = cv2.imread(i, 0)
imglist\[0\].append(img)
imglist\[1\].append(images\[1\]\[cur_img\])
cur_img += 1
keypoints, des = SIFT.detectAndCompute(img, None)
imglist\[2\].append(keypoints)
imglist\[3\].append(des)
return imglist
def run_flann(img):
"""Run FLANN-detector for given image with given image list"""
# Find the keypoint descriptors with SIFT
_, des = SIFT.detectAndCompute(img, None)
if des is None:
return "Unknown", 0
if len(des) < ARGS.MINKP:
return "Unknown", 0
biggest_amnt = 0
biggest_speed = 0
cur_img = 0
try:
for _ in IMAGES\[0\]:
des2 = IMAGES\[3\]\[cur_img\]
matches = FLANN.knnMatch(des2, des, k=2)
matchamnt = 0
# Find matches with Lowe's ratio test
for _, (moo, noo) in enumerate(matches):
if moo.distance < ARGS.FLANNTHRESHOLD*noo.distance:
matchamnt += 1
if matchamnt > biggest_amnt:
biggest_amnt = matchamnt
biggest_speed = IMAGES\[1\]\[cur_img\]
cur_img += 1
if biggest_amnt > ARGS.MINKP:
return biggest_speed, biggest_amnt
else:
return "Unknown", 0
except Exception, exept:
print exept
return "Unknown", 0
IMAGES = load_images("data")
def run_logic():
"""Run TSR and ISA"""
lastlimit = "00"
lastdetect = "00"
downscale = ARGS.DOWNSCALE
matches = 0
possiblematch = "00"
try:
if CAP.isOpened():
rval, frame = CAP.read()
print("Camera opened and frame read")
else:
rval = False
print("Camera not opened")
while rval:
origframe = frame
if ARGS.MORPH:
frame = cv2.morphologyEx(
frame,
cv2.MORPH_OPEN,
cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
)
frame = cv2.morphologyEx(
frame,
cv2.MORPH_CLOSE,
cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if ARGS.EQ:
cv2.equalizeHist(frame, frame)
if ARGS.TRACKBARS:
ARGS.MINKP = cv2.getTrackbarPos('MINKEYPOINTS','preview')
downscale = cv2.getTrackbarPos('DOWNSCALE','preview')
ARGS.FLANNTHRESHOLD = float(
cv2.getTrackbarPos('FLANNTHRESHOLD','preview')
)/10
ARGS.CHECKS = cv2.getTrackbarPos('FLANNCHECKS','preview')
ARGS.TREES = cv2.getTrackbarPos('FLANNTREES','preview')
scaledsize = (frame.shape\[1\]/downscale, frame.shape\[0\]/downscale)
scaledframe = cv2.resize(frame, scaledsize)
# Detect signs in downscaled frame
signs = CLASSIFIER.detectMultiScale(
scaledframe,
1.1,
5,
0,
(10, 10),
(200, 200))
for sign in signs:
xpos, ypos, width, height = \[ i*downscale for i in sign \]
crop_img = frame\[ypos:ypos+height, xpos:xpos+width\]
sized = cv2.resize(crop_img, (128, 128))
comp = "Unknown"
comp, amnt = run_flann(sized)
if comp != "Unknown":
if comp != lastlimit:
# Require two consecutive hits for new limit.
if comp == lastdetect:
possiblematch = comp
matches = matches + 1
if matches >= ARGS.matches:
print "New speed limit: "+possiblematch
lastlimit = possiblematch
matches = 0
else:
possiblematch = "00"
matches = 0
cv2.rectangle(
origframe,
(xpos, ypos),
(xpos+width, ypos+height),
(0, 0, 255))
cv2.putText(
origframe,
"Speed limit: "+comp+" KP: "+str(amnt),
(xpos,ypos-5),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(0,0,255),
1,)
else:
cv2.rectangle(
origframe,
(xpos,ypos),
(xpos+width,ypos+height),
(255,0,0))
cv2.putText(
origframe,
"UNKNOWN SPEED LIMIT",
(xpos,ypos-5),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(255,0,0),
1,)
comp = lastdetect
lastdetect = comp
cv2.putText(
origframe,
"Current speed limit: "+str(lastlimit)+" km/h.",
(5,50),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0,0,0),
2
)
cv2.imshow("preview", origframe)
if ARGS.PREVIEW:
cv2.imshow("preview", origframe)
_ = cv2.waitKey(20)
rval, frame = CAP.read()
except (KeyboardInterrupt, Exception), exept:
print exept
## if ARGS.GPS:
## print "Killing GPS"
## GPSP.running = False
## GPSP.join()
## print "Shutting down!"
# Preload all classes used in detection
sift = cv2.xfeatures2d.SIFT_create()
INDEX_PARAMS = None
SEARCH_PARAMS = None
FLANN = None
## Webcam logic starts
CAP = None
ARGS = None
if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
description="Traffic sign recognition and intelligent speed assist.",
)
PARSER.add_argument("-d", "--device", dest="SOURCE", default=0,
help="Index of used video device. Default: 0 (/dev/video0).")
PARSER.add_argument("-g", "--gps",
dest="GPS", action="store_true", default=False,
help="Enable over speeding detection.")
PARSER.add_argument("-o", "--overspeed",
dest="COMMAND", default="false",
help="Command used in overspeed warning." \
" Default: echo OVERSPEEDING!.")
PARSER.add_argument("-c", "--cascade",
dest="CASCADE", default="lbpCascade.xml",
help="Cascade used in speed sign detection." \
" Default: lbpCascade.xml.")
PARSER.add_argument("-k", "--keypoints",
dest="MINKP", default=5,
help="Min amount of keypoints required in" \
" limit recognition. Default: 5.")
PARSER.add_argument("-D", "--downscale",
dest="DOWNSCALE", default=1,
help="Multiplier for downscaling frame before" \
" detecting signs. Default: 1.")
PARSER.add_argument("-f", "--flann",
dest="FLANNTHRESHOLD", default=0.8,
help="Threshold multiplier for accepting FLANN matches." \
" Default: 0.8.")
PARSER.add_argument("-F", "--flannchecks",
dest="CHECKS", default=50,
help="How many checks will be done in FLANN matching." \
" Default: 50.")
PARSER.add_argument("-t", "--flanntrees",
dest="TREES", default=5,
help="How many trees will be used in FLANN matching." \
" Default: 5.")
PARSER.add_argument("-m", "--matches",
dest="matches", default=2,
help="How many consecutive keypoint matches are needed" \
" before setting new limit. Default: 2.")
PARSER.add_argument("-e", "--disable-eq",
dest="EQ", action="store_false", default=True,
help="Disable histogram equalization.")
PARSER.add_argument("-M", "--morphopenclose",
dest="MORPH", action="store_true", default=False,
help="Enable morphological open/close used in removing" \
" noise from image.")
PARSER.add_argument("-T", "--trackbars",
dest="TRACKBARS", action="store_true", default=False,
help="Enable debug trackbars.")
PARSER.add_argument("-s", "--showvid",
dest="PREVIEW", action="store_true", default=False,
help="Show output video with detections.")
ARGS = PARSER.parse_args()
CAP = cv2.VideoCapture(0)
CLASSIFIER = cv2.CascadeClassifier(ARGS.CASCADE)
INDEX_PARAMS = dict(algorithm = 0, trees = ARGS.TREES)
SEARCH_PARAMS = dict(checks=ARGS.CHECKS) # or pass empty dictionary
FLANN = cv2.FlannBasedMatcher(INDEX_PARAMS, SEARCH_PARAMS)
if ARGS.PREVIEW:
cv2.namedWindow("preview")
if ARGS.GPS:
GPSP = GPSInfo()
GPSP.start()
if ARGS.TRACKBARS:
cv2.createTrackbar(
'MINKEYPOINTS',
'preview',
ARGS.MINKP,
100,
onTrackbarChange)
cv2.createTrackbar(
'DOWNSCALE',
'preview',
int(ARGS.DOWNSCALE),
20,
onTrackbarChange)
cv2.createTrackbar(
'FLANNTHRESHOLD',
'preview',
8,
10,
onTrackbarChange)
cv2.createTrackbar(
'FLANNCHECKS',
'preview',
ARGS.CHECKS,
1000,
onTrackbarChange)
cv2.createTrackbar(
'FLANNTREES',
'preview',
ARGS.TREES,
50,
onTrackbarChange)
run_logic()][1]

In OpenCV 3.2.0, the xfeatures2d module is moved to opencv_contrib. You may verify if your RPI cv2 installation has it or not by help(cv.xfeatures2d) and help(cv2.xfeatures2d.SIFT_create()). If that's the case, you need to find a OpenCV binary has been built with opencv_contrib modules if you didn't build it yourself.

Red Dot Tracker

I am coding a program that can monitor the red dot of a lazer and track the movement. I would like to collect the x and y movement of the laser (deviating from a stable point). Some searching found me this code on git hub which I plan to modify and use to track the red dot using openCV. I am however struggling to pull any data from this.
How would I go about storing the x and y coordinates of the laser in excel (or other useful means)?
import sys
import argparse
import cv2
import numpy
class LaserTracker(object):
def __init__(self, cam_width=640, cam_height=480, hue_min=20, hue_max=160,
sat_min=100, sat_max=255, val_min=200, val_max=256,
display_thresholds=False):
"""
* ``cam_width`` x ``cam_height`` -- This should be the size of the
image coming from the camera. Default is 640x480.
HSV color space Threshold values for a RED laser pointer are determined
by:
* ``hue_min``, ``hue_max`` -- Min/Max allowed Hue values
* ``sat_min``, ``sat_max`` -- Min/Max allowed Saturation values
* ``val_min``, ``val_max`` -- Min/Max allowed pixel values
If the dot from the laser pointer doesn't fall within these values, it
will be ignored.
* ``display_thresholds`` -- if True, additional windows will display
values for threshold image channels.
"""
self.cam_width = cam_width
self.cam_height = cam_height
self.hue_min = hue_min
self.hue_max = hue_max
self.sat_min = sat_min
self.sat_max = sat_max
self.val_min = val_min
self.val_max = val_max
self.display_thresholds = display_thresholds
self.capture = None # camera capture device
self.channels = {
'hue': None,
'saturation': None,
'value': None,
'laser': None,
}
self.previous_position = None
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
def create_and_position_window(self, name, xpos, ypos):
"""Creates a named widow placing it on the screen at (xpos, ypos)."""
# Create a window
cv2.namedWindow(name)
# Resize it to the size of the camera image
cv2.resizeWindow(name, self.cam_width, self.cam_height)
# Move to (xpos,ypos) on the screen
cv2.moveWindow(name, xpos, ypos)
def setup_camera_capture(self, device_num=0):
"""Perform camera setup for the device number (default device = 0).
Returns a reference to the camera Capture object.
"""
try:
device = int(device_num)
sys.stdout.write("Using Camera Device: {0}\n".format(device))
except (IndexError, ValueError):
# assume we want the 1st device
device = 0
sys.stderr.write("Invalid Device. Using default device 0\n")
# Try to start capturing frames
self.capture = cv2.VideoCapture(device)
if not self.capture.isOpened():
sys.stderr.write("Faled to Open Capture device. Quitting.\n")
sys.exit(1)
# set the wanted image size from the camera
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_WIDTH if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_WIDTH,
self.cam_width
)
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_HEIGHT if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_HEIGHT,
self.cam_height
)
return self.capture
def handle_quit(self, delay=10):
"""Quit the program if the user presses "Esc" or "q"."""
key = cv2.waitKey(delay)
c = chr(key & 255)
if c in ['c', 'C']:
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
if c in ['q', 'Q', chr(27)]:
sys.exit(0)
def threshold_image(self, channel):
if channel == "hue":
minimum = self.hue_min
maximum = self.hue_max
elif channel == "saturation":
minimum = self.sat_min
maximum = self.sat_max
elif channel == "value":
minimum = self.val_min
maximum = self.val_max
(t, tmp) = cv2.threshold(
self.channels[channel], # src
maximum, # threshold value
0, # we dont care because of the selected type
cv2.THRESH_TOZERO_INV # t type
)
(t, self.channels[channel]) = cv2.threshold(
tmp, # src
minimum, # threshold value
255, # maxvalue
cv2.THRESH_BINARY # type
)
if channel == 'hue':
# only works for filtering red color because the range for the hue
# is split
self.channels['hue'] = cv2.bitwise_not(self.channels['hue'])
def track(self, frame, mask):
"""
Track the position of the laser pointer.
Code taken from
http://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
"""
center = None
countours = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
# only proceed if at least one contour was found
if len(countours) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(countours, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
moments = cv2.moments(c)
if moments["m00"] > 0:
center = int(moments["m10"] / moments["m00"]), \
int(moments["m01"] / moments["m00"])
else:
center = int(x), int(y)
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# then update the ponter trail
if self.previous_position:
cv2.line(self.trail, self.previous_position, center,
(255, 255, 255), 2)
cv2.add(self.trail, frame, frame)
self.previous_position = center
def detect(self, frame):
hsv_img = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# split the video frame into color channels
h, s, v = cv2.split(hsv_img)
self.channels['hue'] = h
self.channels['saturation'] = s
self.channels['value'] = v
# Threshold ranges of HSV components; storing the results in place
self.threshold_image("hue")
self.threshold_image("saturation")
self.threshold_image("value")
# Perform an AND on HSV components to identify the laser!
self.channels['laser'] = cv2.bitwise_and(
self.channels['hue'],
self.channels['value']
)
self.channels['laser'] = cv2.bitwise_and(
self.channels['saturation'],
self.channels['laser']
)
# Merge the HSV components back together.
hsv_image = cv2.merge([
self.channels['hue'],
self.channels['saturation'],
self.channels['value'],
])
self.track(frame, self.channels['laser'])
return hsv_image
def display(self, img, frame):
"""Display the combined image and (optionally) all other image channels
NOTE: default color space in OpenCV is BGR.
"""
cv2.imshow('RGB_VideoFrame', frame)
cv2.imshow('LaserPointer', self.channels['laser'])
if self.display_thresholds:
cv2.imshow('Thresholded_HSV_Image', img)
cv2.imshow('Hue', self.channels['hue'])
cv2.imshow('Saturation', self.channels['saturation'])
cv2.imshow('Value', self.channels['value'])
def setup_windows(self):
sys.stdout.write("Using OpenCV version: {0}\n".format(cv2.__version__))
# create output windows
self.create_and_position_window('LaserPointer', 0, 0)
self.create_and_position_window('RGB_VideoFrame',
10 + self.cam_width, 0)
if self.display_thresholds:
self.create_and_position_window('Thresholded_HSV_Image', 10, 10)
self.create_and_position_window('Hue', 20, 20)
self.create_and_position_window('Saturation', 30, 30)
self.create_and_position_window('Value', 40, 40)
def run(self):
# Set up window positions
self.setup_windows()
# Set up the camera capture
self.setup_camera_capture()
while True:
# 1. capture the current image
success, frame = self.capture.read()
if not success: # no image captured... end the processing
sys.stderr.write("Could not read camera frame. Quitting\n")
sys.exit(1)
hsv_image = self.detect(frame)
self.display(hsv_image, frame)
self.handle_quit()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run the Laser Tracker')
parser.add_argument('-W', '--width',
default=640,
type=int,
help='Camera Width')
parser.add_argument('-H', '--height',
default=480,
type=int,
help='Camera Height')
parser.add_argument('-u', '--huemin',
default=20,
type=int,
help='Hue Minimum Threshold')
parser.add_argument('-U', '--huemax',
default=160,
type=int,
help='Hue Maximum Threshold')
parser.add_argument('-s', '--satmin',
default=100,
type=int,
help='Saturation Minimum Threshold')
parser.add_argument('-S', '--satmax',
default=255,
type=int,
help='Saturation Maximum Threshold')
parser.add_argument('-v', '--valmin',
default=200,
type=int,
help='Value Minimum Threshold')
parser.add_argument('-V', '--valmax',
default=255,
type=int,
help='Value Maximum Threshold')
parser.add_argument('-d', '--display',
action='store_true',
help='Display Threshold Windows')
params = parser.parse_args()
tracker = LaserTracker(
cam_width=params.width,
cam_height=params.height,
hue_min=params.huemin,
hue_max=params.huemax,
sat_min=params.satmin,
sat_max=params.satmax,
val_min=params.valmin,
val_max=params.valmax,
display_thresholds=params.display
)
tracker.run()

I'm by no means an expert on this, but taking a quick look at the code it seems that self.previous_position = (x,y).
Not sure why you would want to save to excel but to save to file just add f = open(filename, 'w') at the start of the run function and then f.write(self.previous_position) at the end of each loop.
Your file will contain all the recorded x and y coordinates recorded, if your set on saving this to excel check out python's out-the-box csv library.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to retrieve class_name in detect.py example of TensorFlowLite? - python

You can get the number of detections result by doing this Num_of_detections = len(detection_result) then use a for loop to loop the through the results print(detection_result.detections[x].classes[0].class_name)

Related

How to load a TF Lite Model into Python from a file

How to convert RasPi Python Code to JetsonNano compatible python code

How to crop the image with the boundling rectangle after detecting it with a Yolo net

global name 'SIFT' is not defined in python(Pi 3 B)

Red Dot Tracker

Categories

Resources