Multiprocessing with Tensorflow lite and EdgeTPU is not working - python

I'm trying to use the Edge TPU USB Accelerator with multiprocessing.
The fist process handles images to a queue, but do not use the Edge TPU. The second Process get the image from the queue and set it to the input tensor. But it seams that after calling invoke() to the tf.lite.Interpreter the process stops.
def load_model(model_file_path: str):
try:
delegate = tf.lite.experimental.load_delegate('libedgetpu.so.1.0')
except ValueError:
logging.error("EdgeTPU could not be loaded")
pass
interpreter = tf.lite.Interpreter(model_file_path, experimental_delegates=[delegate])
interpreter.allocate_tensors()
return interpreter
def get_detection_results(interpreter):
output_details = interpreter.get_output_details()
if get_output_tensor(interpreter, output_details[3]).size == 1:
boxes = get_output_tensor(interpreter, output_details[0])
classes = get_output_tensor(interpreter, output_details[1])
scores = get_output_tensor(interpreter, output_details[2])
count = int(get_output_tensor(interpreter, output_details[3]))
else:
boxes = get_output_tensor(interpreter, output_details[1])
classes = get_output_tensor(interpreter, output_details[3])
scores = get_output_tensor(interpreter, output_details[0])
count = int(get_output_tensor(interpreter, output_details[2]))
# some conversions
classes = DataUtil.convert_class_ids_to_names(classes)
return boxes, classes, scores, count
def set_input_tensor(interpreter, image):
tensor_index = interpreter.get_input_details()[0]['index']
input_tensor = interpreter.tensor(tensor_index)()[0]
img_height, img_width, _ = image.shape
input_height, input_width, _ = input_tensor.shape
if img_height != input_height or img_width != input_width:
logging.error("Image is not the correct size")
else:
input_tensor[:, :, :] = image
This is the problematic class:
class ImgProcessor:
def __init__(self):
self.detect_work_queue = multiprocessing.Queue()
self.detect_result_queue = multiprocessing.Queue()
self.detect_interpreter = TfUtil.load_model(models_path)
This method will be called in a loop for each image of a video:
def process_img(self, frame: np.ndarray):
self.detect_work_queue.put((frame, frame.shape))
This method is running as the second process:
def detect(self, detect_queue, detect_result_queue, lock):
while True:
frame, shape = detect_queue.get()
# paste the image to nn
TfUtil.set_input_tensor(self.detect_interpreter, frame)
# run detection
self.detect_interpreter.invoke() # Error
boxes, classes, scores, _ = TfUtil.get_detection_results(self.detect_interpreter)
detect_result_queue.put((boxes, classes, scores))
Dose anyone know if tf_lite with the edgeTPU is maybe incompatible to multiprocessing or what I'm doing wrong?

Related

Getting black image after inference from .engine file using python

I converted the pytorch Real-ESRGAN model to model.engine file from using c++ code <here>. After conversion inference is working well on c++. But when I try to infer the image from this model.engine in python. It gives me the black image as given below here is the image.
GPU : RTX 3090
OS : Ubuntu20.04
Cuda version : 11.4
TensorRT version : 22
import os
import time
import cv2
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
def load_engine(trt_runtime, engine_path):
with open(engine_path, "rb") as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
def allocate_buffers(engine, batch_size=1):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(
batch_size=1, bindings=bindings, stream_handle=stream.handle
)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def preprocess_image(input_image_path):
image_raw = cv2.imread(input_image_path)
return image_raw
def process_image(arr, w, h):
image = Image.fromarray(np.uint8(arr))
image_resized = image.resize(size=(w, h), resample=Image.BILINEAR)
img_np = np.array(image_resized)
# HWC -> CHW
img_np = img_np.transpose((2, 0, 1))
# Normalize to [0.0, 1.0] interval (expected by model)
img_np = (1.0 / 255.0) * img_np
print(img_np.shape)
img_np = img_np.ravel()
return img_np
def predict(image):
img = preprocess_image(image)
print(img.shape)
np.copyto(inputs[0].host, img.ravel())
inference_start_time = time.time()
# Fetch output from the model
output = do_inference(
context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream
)
# Output inference time
print(output)
# And return results
return output
# -------------- MODEL PARAMETERS FOR DETECTNET_V2 --------------------------------
model_h = 1536
model_w = 1536
import ctypes
PLUGIN_LIBRARY = "build/libmyplugins.so"
ctypes.CDLL(PLUGIN_LIBRARY)
# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_engine_path = 'build/real-esrgan_f32.engine'
trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
output = predict('image.jpg')[0]
output = output.reshape(3072,3072,3)
print(output.shape)
print(output.dtype)
cv2.imwrite('output.jpg', output)

How to load a TF Lite Model into Python from a file

I've followed the End-to-End image classification tutorial for tensorflow lite and have created and saved my model as '/path/to/model.tflite'.
What I haven't been able to figure out is how to load it.
I'm looking for some kind of syntax that is similar to this:
from tflite_model_maker import image_classifier
from tflite_model_maker.image_classifier import DataLoader
model = image_classifier.Load('/path/to/model.tflite')
I'm sure I'm missing something obvious here. This is definitely not the first place I've looked at. This seems to be the best place for me to find what I need, but the syntax used confuses me.
What do I want to be able to do with the model?
test = DataLoader.from_folder('/path/to/testImages')
loss, accuracy = model.evaluate(test)
# A helper function that returns 'red'/'black' depending on if its two input
# parameter matches or not.
def get_label_color(val1, val2):
if val1 == val2:
return 'black'
else:
return 'red'
# Then plot 100 test images and their predicted labels.
# If a prediction result is different from the label provided label in "test"
# dataset, we will highlight it in red color.
test_data = data
plt.figure(figsize=(20, 20))
predicts = model.predict_top_k(test_data)
for i, (image, label) in enumerate(test_data.gen_dataset().unbatch().take(100)):
ax = plt.subplot(10, 10, i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(image.numpy(), cmap=plt.cm.gray)
predict_label = predicts[i][0][0]
color = get_label_color(predict_label,
test_data.index_to_label[label.numpy()])
ax.xaxis.label.set_color(color)
plt.xlabel('Predicted: %s' % predict_label)
plt.show()
From the syntax above it seems the model isn't just a file but is a type/class/method depending on what name is most suitable for python.
Feels like this should only take one line of code but I haven't been able to find it anywhere.
Managed to do a simple version of it. The images coming up as a stream doesn't work for me using cv2 with Windows as it does for the pi. So instead I created a webpage in the same directory as this script. This generates an image with the bounding box, using a specified tflite model. This is in no way ideal.
It uses a webcam to get the image and saves the image to the directory the script is run in. It then renames the file so it can be viewed by the webpage I setup to view it.
The majority of this code comes from the TFLite Object Detection Raspberry Pi sample.
import time, os
from PIL import Image
from tflite_support import metadata
import platform
from typing import List, NamedTuple
import json
import cv2 as cv2
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
Interpreter = tf.lite.Interpreter
load_delegate = tf.lite.experimental.load_delegate
class ObjectDetectorOptions(NamedTuple):
"""A config to initialize an object detector."""
enable_edgetpu: bool = False
"""Enable the model to run on EdgeTPU."""
label_allow_list: List[str] = None
"""The optional allow list of labels."""
label_deny_list: List[str] = None
"""The optional deny list of labels."""
max_results: int = -1
"""The maximum number of top-scored detection results to return."""
num_threads: int = 1
"""The number of CPU threads to be used."""
score_threshold: float = 0.0
"""The score threshold of detection results to return."""
class Rect(NamedTuple):
"""A rectangle in 2D space."""
left: float
top: float
right: float
bottom: float
class Category(NamedTuple):
"""A result of a classification task."""
label: str
score: float
index: int
class Detection(NamedTuple):
"""A detected object as the result of an ObjectDetector."""
bounding_box: Rect
categories: List[Category]
def edgetpu_lib_name():
"""Returns the library name of EdgeTPU in the current platform."""
return {
'Darwin': 'libedgetpu.1.dylib',
'Linux': 'libedgetpu.so.1',
'Windows': 'edgetpu.dll',
}.get(platform.system(), None)
class ObjectDetector:
"""A wrapper class for a TFLite object detection model."""
_OUTPUT_LOCATION_NAME = 'location'
_OUTPUT_CATEGORY_NAME = 'category'
_OUTPUT_SCORE_NAME = 'score'
_OUTPUT_NUMBER_NAME = 'number of detections'
def __init__(
self,
model_path: str,
options: ObjectDetectorOptions = ObjectDetectorOptions()
) -> None:
"""Initialize a TFLite object detection model.
Args:
model_path: Path to the TFLite model.
options: The config to initialize an object detector. (Optional)
Raises:
ValueError: If the TFLite model is invalid.
OSError: If the current OS isn't supported by EdgeTPU.
"""
# Load metadata from model.
displayer = metadata.MetadataDisplayer.with_model_file(model_path)
# Save model metadata for preprocessing later.
model_metadata = json.loads(displayer.get_metadata_json())
process_units = model_metadata['subgraph_metadata'][0]['input_tensor_metadata'][0]['process_units']
mean = 0.0
std = 1.0
for option in process_units:
if option['options_type'] == 'NormalizationOptions':
mean = option['options']['mean'][0]
std = option['options']['std'][0]
self._mean = mean
self._std = std
# Load label list from metadata.
file_name = displayer.get_packed_associated_file_list()[0]
label_map_file = displayer.get_associated_file_buffer(file_name).decode()
label_list = list(filter(lambda x: len(x) > 0, label_map_file.splitlines()))
self._label_list = label_list
# Initialize TFLite model.
if options.enable_edgetpu:
if edgetpu_lib_name() is None:
raise OSError("The current OS isn't supported by Coral EdgeTPU.")
interpreter = Interpreter(
model_path=model_path,
experimental_delegates=[load_delegate(edgetpu_lib_name())],
num_threads=options.num_threads)
else:
interpreter = Interpreter(
model_path=model_path, num_threads=options.num_threads)
interpreter.allocate_tensors()
input_detail = interpreter.get_input_details()[0]
# From TensorFlow 2.6, the order of the outputs become undefined.
# Therefore we need to sort the tensor indices of TFLite outputs and to know
# exactly the meaning of each output tensor. For example, if
# output indices are [601, 599, 598, 600], tensor names and indices aligned
# are:
# - location: 598
# - category: 599
# - score: 600
# - detection_count: 601
# because of the op's ports of TFLITE_DETECTION_POST_PROCESS
# (https://github.com/tensorflow/tensorflow/blob/a4fe268ea084e7d323133ed7b986e0ae259a2bc7/tensorflow/lite/kernels/detection_postprocess.cc#L47-L50).
sorted_output_indices = sorted(
[output['index'] for output in interpreter.get_output_details()])
self._output_indices = {
self._OUTPUT_LOCATION_NAME: sorted_output_indices[0],
self._OUTPUT_CATEGORY_NAME: sorted_output_indices[1],
self._OUTPUT_SCORE_NAME: sorted_output_indices[2],
self._OUTPUT_NUMBER_NAME: sorted_output_indices[3],
}
self._input_size = input_detail['shape'][2], input_detail['shape'][1]
self._is_quantized_input = input_detail['dtype'] == np.uint8
self._interpreter = interpreter
self._options = options
def detect(self, input_image: np.ndarray) -> List[Detection]:
"""Run detection on an input image.
Args:
input_image: A [height, width, 3] RGB image. Note that height and width
can be anything since the image will be immediately resized according
to the needs of the model within this function.
Returns:
A Person instance.
"""
image_height, image_width, _ = input_image.shape
input_tensor = self._preprocess(input_image)
self._set_input_tensor(input_tensor)
self._interpreter.invoke()
# Get all output details
boxes = self._get_output_tensor(self._OUTPUT_LOCATION_NAME)
classes = self._get_output_tensor(self._OUTPUT_CATEGORY_NAME)
scores = self._get_output_tensor(self._OUTPUT_SCORE_NAME)
count = int(self._get_output_tensor(self._OUTPUT_NUMBER_NAME))
return self._postprocess(boxes, classes, scores, count, image_width,
image_height)
def _preprocess(self, input_image: np.ndarray) -> np.ndarray:
"""Preprocess the input image as required by the TFLite model."""
# Resize the input
input_tensor = cv2.resize(input_image, self._input_size)
# Normalize the input if it's a float model (aka. not quantized)
if not self._is_quantized_input:
input_tensor = (np.float32(input_tensor) - self._mean) / self._std
# Add batch dimension
input_tensor = np.expand_dims(input_tensor, axis=0)
return input_tensor
def _set_input_tensor(self, image):
"""Sets the input tensor."""
tensor_index = self._interpreter.get_input_details()[0]['index']
input_tensor = self._interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = image
def _get_output_tensor(self, name):
"""Returns the output tensor at the given index."""
output_index = self._output_indices[name]
tensor = np.squeeze(self._interpreter.get_tensor(output_index))
return tensor
def _postprocess(self, boxes: np.ndarray, classes: np.ndarray,
scores: np.ndarray, count: int, image_width: int,
image_height: int) -> List[Detection]:
"""Post-process the output of TFLite model into a list of Detection objects.
Args:
boxes: Bounding boxes of detected objects from the TFLite model.
classes: Class index of the detected objects from the TFLite model.
scores: Confidence scores of the detected objects from the TFLite model.
count: Number of detected objects from the TFLite model.
image_width: Width of the input image.
image_height: Height of the input image.
Returns:
A list of Detection objects detected by the TFLite model.
"""
results = []
# Parse the model output into a list of Detection entities.
for i in range(count):
if scores[i] >= self._options.score_threshold:
y_min, x_min, y_max, x_max = boxes[i]
bounding_box = Rect(
top=int(y_min * image_height),
left=int(x_min * image_width),
bottom=int(y_max * image_height),
right=int(x_max * image_width))
class_id = int(classes[i])
category = Category(
score=scores[i],
label=self._label_list[class_id], # 0 is reserved for background
index=class_id)
result = Detection(bounding_box=bounding_box, categories=[category])
results.append(result)
# Sort detection results by score ascending
sorted_results = sorted(
results,
key=lambda detection: detection.categories[0].score,
reverse=True)
# Filter out detections in deny list
filtered_results = sorted_results
if self._options.label_deny_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label not in self.
_options.label_deny_list, filtered_results))
# Keep only detections in allow list
if self._options.label_allow_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label in self._options.
label_allow_list, filtered_results))
# Only return maximum of max_results detection.
if self._options.max_results > 0:
result_count = min(len(filtered_results), self._options.max_results)
filtered_results = filtered_results[:result_count]
return filtered_results
_MARGIN = 10 # pixels
_ROW_SIZE = 10 # pixels
_FONT_SIZE = 1
_FONT_THICKNESS = 1
_TEXT_COLOR = (0, 0, 255) # red
def visualize(
image: np.ndarray,
detections: List[Detection],
) -> np.ndarray:
"""Draws bounding boxes on the input image and return it.
Args:
image: The input RGB image.
detections: The list of all "Detection" entities to be visualize.
Returns:
Image with bounding boxes.
"""
for detection in detections:
# Draw bounding_box
start_point = detection.bounding_box.left, detection.bounding_box.top
end_point = detection.bounding_box.right, detection.bounding_box.bottom
cv2.rectangle(image, start_point, end_point, _TEXT_COLOR, 3)
# Draw label and score
category = detection.categories[0]
class_name = category.label
probability = round(category.score, 2)
result_text = class_name + ' (' + str(probability) + ')'
text_location = (_MARGIN + detection.bounding_box.left,
_MARGIN + _ROW_SIZE + detection.bounding_box.top)
cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
_FONT_SIZE, _TEXT_COLOR, _FONT_THICKNESS)
return image
# ---------------------------------- #
# This is where the custom code starts
# ---------------------------------- #
# Load the TFLite model
TFLITE_MODEL_PATH='object.tflite'
DETECTION_THRESHOLD = 0.5 # 50% threshold required before identifying
options = ObjectDetectorOptions(
num_threads=4,
score_threshold=DETECTION_THRESHOLD,
)
# Close camera if already open
try:
cap.release()
except:
print("",end="") # do nothing
detector = ObjectDetector(model_path=TFLITE_MODEL_PATH, options=options)
cap = cv2.VideoCapture(0) #webcam
counter = 0 # Store many times model has run
while cap.isOpened():
success, image = cap.read()
if not success:
sys.exit(
'ERROR: Unable to read from webcam. Please verify your webcam settings.'
)
image = cv2.flip(image, 1)
# Convert the image from BGR to RGB as required by the TFLite model.
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#image.thumbnail((512, 512), Image.ANTIALIAS)
image_np = np.asarray(image)
# Run object detection estimation using the model.
detections = detector.detect(image_np)
# Draw keypoints and edges on input image
image_np = visualize(image_np, detections)
if counter == 10: # <- Change this to decide how many iterations
cap.release()
break
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
plt.imsave('tmp.jpg',image_np) # Saves the image
os.replace("tmp.jpg", "web.jpg",) # Renames it for the webpage
counter += 1
print(counter)
cap.release()
Here's the HTML for the document placed in the same directory as the python file, I saved it as index.html and opened in the browser while running the python script above.
<!DOCTYPE html>
<html>
<head>
<title>Object Detection</title>
</head>
<body>
<h1>Object Detection</h1>
<p>This displays images saved during detection process</p>
<canvas id="x" width="700px" height="500px"></canvas>
<script>
var newImage = new Image();
newImage.src = "web.jpg";
var canvas = document.getElementById("x");
var context = canvas.getContext("2d");
newImage.onload = function() {
context.drawImage(newImage, 0, 0);
console.log("trigger")
setTimeout(timedRefresh, 1000);
};
function timedRefresh() {
// just change src attribute, will always trigger the onload callback
try {
newImage.src = ("web.jpg#" + new Date().getTime());
}catch(e){
console.log(e);
}
}
setTimeout(timedRefresh, 100);
</script>
</body>
</html>
It's incredibly slow, not ideal in many ways and probably breaks many good coding conventions. It was only used locally, would definitely not use this for a production environment nor recommend its use. Just needed a quick proof of concept and this worked for that.

Training stuck at Epoch 3 PyTorch

I am training a custom Encoder-Decoder network but the training gets stuck at Epoch 3. Nothing happens for about 2 hours. I will share the Dataset class and the DataLoader object. The version if CUDA and GPU can be seen in the pic below.
Training stuck here:
nvidia-smi output looks like this:
The __getitem__ method of the dataset class looks like this:
def __init__(self,
images_dir,
annots_dir,
train=True,
img_size=(512, 1536),
stride=4,
model='custom',
transforms=None):
"""
:param root: dataset directory
:param filenames: filenames inside the root directory
:param labels: Object Detection Labels
super(CustomDataset).__init__()
self.images_dir = images_dir
self.annots_dir = annots_dir
self.train = train
self.image_size = img_size
self.stride = stride
self.transforms = transforms
self.model = model
# Load the image and annotation files from the dataset
# self.image_files, self.annot_files = self._load_image_and_annot_files()
self.image_files = [os.path.join(self.images_dir, idx) for idx in os.listdir(self.images_dir)]
self.annot_files = [os.path.join(self.annots_dir, idx) for idx in os.listdir(self.annots_dir)]
def __getitem__(self, index):
"""
:param index: index...0 to N
:return: tensor_image and tensor_label
"""
# Image filename from _load_image_files()
# Load Image with _read_matrix() and label
curr_image_filename = self.image_files[index]
curr_annot_filename = self.annot_files[index]
# curr_image_filename = self.image_files[index]
# curr_annot_filename = self.annot_files[index]
np_image = self._read_matrix(raw_img=curr_image_filename)
np_image_normalized = np.squeeze(self._normalize_raw_img(np_image))
# label = self.labels[index]
boxes, classes, depths, tgts = self._load_annotations(curr_annot_filename)
# Normalize bounding boxes: range [0, 1]
targets_normalized = self._normalize_bbox(np_image_normalized, tgts)
# image and the corresponding label should be a tensor
torch_image = torch.from_numpy(np_image).reshape(1, 512, 1536).float() # dtype: torch.float64
torch_boxes = torch.from_numpy(boxes).type(torch.FloatTensor)
torch_depths = torch.from_numpy(depths)
if self.model == 'fasterrcnn':
# For FasterRCNN: As COCO format
area = (torch_boxes[:, 3] - torch_boxes[:, 1]) * (torch_boxes[:, 2] - torch_boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
image_id = torch.Tensor([index])
torch_classes = torch.from_numpy(classes)
target = {'boxes': torch_boxes, 'labels': torch_classes.long(),
'area': area, 'iscrowd': iscrowd, 'image_id': image_id}
return torch_image, target
elif self.model == 'custom':
if self.train:
if self.transforms:
try:
tr = self.transforms()
transform_image, transform_boxes, labels = tr.__call__(np_image, tgts, tgts[:, :4], tgts[:, 4:])
transform_targets = np.hstack((np.array(transform_boxes), labels))
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=transform_targets)
return torch.from_numpy(transform_image).float(), gt_tensor
except IndexError:
pass
else:
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=targets_normalized)
return torch_image, gt_tensor
else:
return torch_image, targets_normalized
And in the train.py script the DataLoader object is:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
shuffle=True,
batch_size=1,
num_workers=0,
collate_fn=detection_collate,
pin_memory=True)
Why does the training get stuck? Is there an issue with the __getitem__ method? Or the DataLoader?
Thank You.
This happens because torch doesnt restart your dataset, if your data runs out it stops and waits for more input so cycling has to be done manually.
I used something along the lines of
from itertools import cycle
class Dataloader():
#init and whatever
self.__iter__():
return cycle(get_sample()) # get_sample is your current getitem

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.HalfTensor) should be the same

I just start to learn about the YOLO v5 PyTorch version and I was able to build a model, so then I tried to implement a flask application for real-time prediction using this trained model.
class for load model and predict
class Model(object):
def __init__(self, model):
self.device = torch_utils.select_device()
print(self.device)
model = torch.load(model, map_location=self.device)['model']
self.half = False and self.device.type != 'cpu'
print('half = ' + str(self.half))
if self.half:
model.half()
# model = model.to(self.device).eval()
model.cuda()
self.loaded_model = model
def predict(self, img):
global session
# img1 = torch.from_numpy(img).to(self.device)
# img = img1.reshape(1, 3, 640, 640)
img = img.half() if self.half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
print(img.ndimension())
if img.ndimension() == 3:
img = img.unsqueeze(0)
print(self.loaded_model)
img = img.to(self.device)
# img = img.half()
self.preds = self.loaded_model(img, augment=False)[0]
print(self.predict())
return self.preds
Camera class for reading frames from camera or video
model = Model("weights/best.pt")
class Camera(object):
def __init__(self):
# self.video = cv2.VideoCapture('facial_exp.mkv')
self.video = cv2.VideoCapture(0)
def __del__(self):
self.video.release()
def get_frame(self):
_, fr = self.video.read()
loader = transforms.Compose([transforms.ToTensor()])
image = cv2.resize(fr, (640, 640), interpolation=cv2.INTER_AREA)
input_im = image.reshape(1, 640, 640, 3)
pil_im = Image.fromarray(fr)
image = loader(pil_im).float()
# image = Variable(image, requires_grad=True)
image = image.unsqueeze(0)
pred = model.predict(input_im)
pred = model.predict(image)
print(pred)
_, jpeg = cv2.imencode('.jpg', fr)
return jpeg.tobytes()
Some of the commented lines are the ways which I tried but in all times bellow line
self.preds = self.loaded_model(img, augment=False)[0] throws below error
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.HalfTensor) should be the same
any idea or guidance for solving this error thank you.
this error means: the input type is float32, the weight type(of your model) is float16.
for exsample, this code below runned:
model.half() # so the weight type is float16
but this code below not runned:
img = img.half() # so the input type is float32
please check your code.
for more information about 'half', you can refer to torch.Tensor.to() and torch.nn.Module.to()
I was facing the same error, basically I was not sending my model to the GPU, so moving the model to the GPU device solved the error:
model = model.to(device)

Tensorflow inference too slow when loading multiple models

I finetuned two Mobilenet models on diferent datasets based on the tensorflow object_detection API example from here. When I use eager mode (tf.executing_eagerly() is True) using only one model then the inference runs at 0.036 seconds per image. When I load two models Keras required to convert to graph mode (tf.executing_eagerly() is False) and the inference runs at 1.8 seconds per image. What I'm doing wrong?
def inference(pipeline_config, checkpoint_path):
print('Building model and restoring weights', flush=True)
num_classes = 3
# Load pipeline config and build a detection model.
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
detection_model = model_builder.build(
model_config=model_config, is_training=False)
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(checkpoint_path).expect_partial()
# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 320, 320, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
return detection_model
def get_model_detection_function(detection_model):
"""Get a tf.function for detection."""
# Again, uncomment this decorator if you want to run inference eagerly
#tf.function
def detect(input_tensor):
"""Run detection on an input image.
Args:
input_tensor: A [1, height, width, 3] Tensor of type tf.float32.
Note that height and width can be anything since the image will be
immediately resized according to the needs of the model within this
function.
Returns:
A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,
and `detection_scores`).
"""
preprocessed_image, shapes = detection_model.preprocess(input_tensor)
prediction_dict = detection_model.predict(preprocessed_image, shapes)
return detection_model.postprocess(prediction_dict, shapes)
return detect
def mainProcess():
print('Loading model 1...')
g1 = tf.Graph()
s1 = tf.compat.v1.Session(graph=g1)
with g1.as_default(), s1.as_default():
detection_model_1 = inference('config_1/pipeline.config', 'Checkpoint_1/ckpt-1')
detect_fn_1 = get_model_detection_function(detection_model_1)
s1.run(tf.compat.v1.global_variables_initializer())
print('Loading model 2...')
g2 = tf.Graph()
s2 = tf.compat.v1.Session(graph=g2)
with g2.as_default():
detection_model_2 = inference('config_2/pipeline.config', 'Checkpoint_2/ckpt-1')
detect_fn_2 = get_model_detection_function(detection_model_2)
s2.run(tf.compat.v1.global_variables_initializer())
for i, f in enumerate(listdir('images_dir/')):
...
... read the image
...
with g1.as_default():
with s1.as_default():
sec = time.time()
input_tensor = tf.convert_to_tensor(test_img, dtype=tf.float32)
detections = detect_fn_1(input_tensor)
detections = s1.run(detections)
curr = time.time()
print("Finished iterating in: " + str(curr - sec) + " seconds")
# the same for detection_model_2
For eager mode with only one model the mainProcess is:
def mainProcess():
print('Loading model...')
detection_model_1 = inference('config_1/pipeline.config', 'Checkpoint_1/ckpt-1')
detect_fn_1 = get_model_detection_function(detection_model_1)
for i, f in enumerate(listdir('images_dir/')):
...
... read the image
...
sec = time.time()
input_tensor = tf.convert_to_tensor(test_img, dtype=tf.float32)
detections = detect_fn_1(input_tensor)
print(detections['detection_boxes'][0].numpy())
print(detections['detection_scores'][0].numpy())
curr = time.time()
print("Finished iterating in: " + str(curr - sec) + " seconds")

Categories

Resources