Error Training Custom COCO Dataset with Mask R-CNN - python

I'm trying to train a custom COCO-format dataset with Matterport's Mask R-CNN on Tensorflow/Keras. My datasets are json files with the aforementioned COCO-format, with each item in the "annotations" section looking like this:
There are 20 classes, with polygon masks for the entire object, and then polygon masks for the parts within the object. The images are of shape 256x448x3.
The code for the dataset preparation is as follows:
class CocoLikeDataset(utils.Dataset):
""" Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
See http://cocodataset.org/#home for more information.
"""
def load_data(self, annotation_json, images_dir):
""" Load the coco-like dataset from json
Args:
annotation_json: The path to the coco annotations json file
images_dir: The directory holding the images referred to by the json file
"""
# Load json from file
json_file = open(annotation_json)
coco_json = json.load(json_file)
json_file.close()
# Add the class names using the base method from utils.Dataset
source_name = "coco_like"
for category in coco_json['categories']:
class_id = category['id']+1
class_name = category['name']
if class_id < 1:
print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
return
self.add_class(source_name, class_id, class_name)
# Get all annotations
annotations = {}
for annotation in coco_json['annotations']:
image_id = annotation['image_id']
if image_id not in annotations:
annotations[image_id] = []
annotations[image_id].append(annotation)
# Get all images and add them to the dataset
seen_images = {}
for image in coco_json['images']:
image_id = image['id']
if image_id in seen_images:
print("Warning: Skipping duplicate image id: {}".format(image))
else:
seen_images[image_id] = image
try:
image_file_name = image['file_name']
image_width = image['width']
image_height = image['height']
except KeyError as key:
print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
image_annotations = annotations[image_id]
# Add the image using the base method from utils.Dataset
self.add_image(
source=source_name,
image_id=image_id,
path=image_path,
width=image_width,
height=image_height,
annotations=image_annotations
)
def load_mask(self, image_id):
""" Load instance masks for the given image.
MaskRCNN expects masks in the form of a bitmap [height, width, instances].
Args:
image_id: The id of the image to load masks for
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
annotations = image_info['annotations']
instance_masks = []
class_ids = []
for annotation in annotations:
class_id = annotation['category_id']
mask = Image.new('1', (image_info['width'], image_info['height']))
mask_draw = ImageDraw.ImageDraw(mask, '1')
for segmentation in annotation['segmentation']:
mask_draw.polygon(segmentation, fill=1)
bool_array = np.array(mask) > 0
instance_masks.append(bool_array)
class_ids.append(class_id)
mask = np.dstack(instance_masks)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
This seems to work well at least in loading the images & masks in a presentable format, as testing it out to visualize the images, masks, and class ids for some of the images yields good results where I can see each image, binary mask, and class ids of each mask.
However, when I actually try to train the model on the training dataset created, I get the following error:
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
1 # Create model in training mode
2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3 model_dir=MODEL_DIR)
3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
1835 self.model_dir = model_dir
1836 self.set_log_dir()
-> 1837 self.keras_model = self.build(mode=mode, config=config)
1838
1839 def build(self, mode, config):
/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
1927 # Anchors
1928 if mode == "training":
-> 1929 anchors = self.get_anchors(config.IMAGE_SHAPE)
1930 # Duplicate across the batch dimension because Keras requires it
1931 # TODO: can this be optimized to avoid duplicating the anchors?
/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
2609 backbone_shapes,
2610 self.config.BACKBONE_STRIDES,
-> 2611 self.config.RPN_ANCHOR_STRIDE)
2612 # Keep a copy of the latest anchors in pixel coordinates because
2613 # it's used in inspect_model notebooks.
/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
635 anchors = []
636 for i in range(len(scales)):
--> 637 anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 feature_strides[i], anchor_stride))
639 return np.concatenate(anchors, axis=0)
IndexError: index 5 is out of bounds for axis 0 with size 5
I have no clue what this error is actually indicating, nor what the potential solution might possibly be. I have a feeling it might have to do with how the data is being formatted & handled by the CocoLikeDataset class, but I'm not sure.
Any help with this identifying the issue and solving it is appreciated!
Thanks!

Set the RPN_ANCHOR_SCALE variable in your config file to:
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)

Related

convert pytorch MAT model in github reposity to onnx

Unsupported: ONNX export of convolution for kernel of unknown shape. [Caused by the value 'x.47 defined in (%x.47 : Float(*, *, *, *, strides=[12168000, 67600, 260, 1], requires_grad=0, device=cpu) = onnx::Slice(%874, %875, %876, %877, %878), scope: torch_utils.persistence.persistent_class..Decorator::/torch_utils.persistence.persistent_class..Decorator::synthesis/torch_utils.persistence.persistent_class..Decorator::first_stage/torch_utils.persistence.persistent_class..Decorator::enc_conv.1/torch_utils.persistence.persistent_class..Decorator::conv # /Users/QSoft019/Documents/ai-image-research/MAT/torch_utils/ops/upfirdn2d.py:190:0
)' (type 'Tensor') in the TorchScript graph. The containing node has kind 'onnx::Slice'.]
github: https://github.com/fenglinglwb/mat
there is no error when running generate_image.py with pretrained file, but when converting to onnx, there are many warnings
finally, it stoped at line
assert isinstance(groups, int) and (groups >= 1)
in file MAT/torch_utils/ops/conv2d_resample.py
I had commented that line, but it still stopped at file venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py because weight_size (kernel_shape) variable was full of None value
I found that many integer variable -when converting to onnx- became tensors
this caused warnings, groups variable became a tensor, too
Am I in error at some where ?
My fuction:
def convert_torch_to_onnx_(onnx_path, image_path, model=None, torch_path=None):
"""
Coverts Pytorch model file to ONNX
:param torch_path: Torch model path to load
:param onnx_path: ONNX model path to save
:param image_path: Path to test image to use in export progress
"""
from datasets.mask_generator_512 import RandomMask
if torch_path is not None:
pytorch_model = get_torch_model(torch_path)
else:
pytorch_model = model
device = torch.device('cpu')
# image, _, torch_image = get_example_input(image_path)
image = read_image(image_path)
torch_image = (torch.from_numpy(image).float().to(device) / 127.5 - 1).unsqueeze(0)
label = torch.zeros([1, pytorch_model.c_dim], device=device)
resolution = 512
mask = RandomMask(resolution) # adjust the masking ratio by using 'hole_range'
mask = torch.from_numpy(mask).float().to(device).unsqueeze(0)
z = torch.from_numpy(np.random.randn(1, pytorch_model.z_dim)).to(device)
truncation_psi = 1
noise_mode = 'const'
torch.onnx.export(
pytorch_model,
(torch_image, mask, z, label, truncation_psi, noise_mode),
onnx_path,
verbose=True,
export_params=True,
# do_constant_folding=False,
# input_names=['input'],
opset_version=11,
# output_names=['output']
)
and generate_images function provided by author (default values of input variable were edited)
def generate_images(
# network_pkl: str = 'pretrained/CelebA-HQ_512.pkl',
network_pkl: str = '/Downloads/MAT/models/Places_512_FullData.pkl',
dpath: str = 'test_sets/CelebA-HQ/images',
# mpath=None,
mpath: str = 'test_sets/CelebA-HQ/masks',
resolution: int = 512,
truncation_psi: float = 1,
noise_mode: str = 'const',
outdir: str = 'samples',
model: bool = False,
):
"""
Generate images using pretrained network pickle.
"""
seed = 240 # pick up a random number
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
print(f'Loading data from: {dpath}')
img_list = sorted(glob.glob(dpath + '/*.png') + glob.glob(dpath + '/*.jpg'))
if mpath is not None:
print(f'Loading mask from: {mpath}')
mask_list = sorted(glob.glob(mpath + '/*.png') + glob.glob(mpath + '/*.jpg'))
assert len(img_list) == len(mask_list), 'illegal mapping'
print(f'Loading networks from: {network_pkl}')
device = torch.device('cpu')
# device = torch.device('cuda')
with dnnlib.util.open_url(network_pkl) as f:
G_saved = legacy.load_network_pkl(f)['G_ema'].to(device).eval().requires_grad_(False) # type: ignore
net_res = 512 if resolution > 512 else resolution
G = Generator(z_dim=512, c_dim=0, w_dim=512, img_resolution=net_res, img_channels=3).to(device).eval().requires_grad_(False)
copy_params_and_buffers(G_saved, G, require_all=True)
if model:
return G
os.makedirs(outdir, exist_ok=True)
# no Labels.
label = torch.zeros([1, G.c_dim], device=device)
if resolution != 512:
noise_mode = 'random'
with torch.no_grad():
for i, ipath in enumerate(img_list):
iname = os.path.basename(ipath).replace('.jpg', '.png')
print(f'Prcessing: {iname}')
image = read_image(ipath)
image = (torch.from_numpy(image).float().to(device) / 127.5 - 1).unsqueeze(0)
if mpath is not None:
mask = cv2.imread(mask_list[i], cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255.0
mask = torch.from_numpy(mask).float().to(device).unsqueeze(0).unsqueeze(0)
else:
mask = RandomMask(resolution) # adjust the masking ratio by using 'hole_range'
mask = torch.from_numpy(mask).float().to(device).unsqueeze(0)
z = torch.from_numpy(np.random.randn(1, G.z_dim)).to(device)
output = G(image, mask, z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
output = (output.permute(0, 2, 3, 1) * 127.5 + 127.5).round().clamp(0, 255).to(torch.uint8)
output = output[0].cpu().numpy()

How to load a TF Lite Model into Python from a file

I've followed the End-to-End image classification tutorial for tensorflow lite and have created and saved my model as '/path/to/model.tflite'.
What I haven't been able to figure out is how to load it.
I'm looking for some kind of syntax that is similar to this:
from tflite_model_maker import image_classifier
from tflite_model_maker.image_classifier import DataLoader
model = image_classifier.Load('/path/to/model.tflite')
I'm sure I'm missing something obvious here. This is definitely not the first place I've looked at. This seems to be the best place for me to find what I need, but the syntax used confuses me.
What do I want to be able to do with the model?
test = DataLoader.from_folder('/path/to/testImages')
loss, accuracy = model.evaluate(test)
# A helper function that returns 'red'/'black' depending on if its two input
# parameter matches or not.
def get_label_color(val1, val2):
if val1 == val2:
return 'black'
else:
return 'red'
# Then plot 100 test images and their predicted labels.
# If a prediction result is different from the label provided label in "test"
# dataset, we will highlight it in red color.
test_data = data
plt.figure(figsize=(20, 20))
predicts = model.predict_top_k(test_data)
for i, (image, label) in enumerate(test_data.gen_dataset().unbatch().take(100)):
ax = plt.subplot(10, 10, i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(image.numpy(), cmap=plt.cm.gray)
predict_label = predicts[i][0][0]
color = get_label_color(predict_label,
test_data.index_to_label[label.numpy()])
ax.xaxis.label.set_color(color)
plt.xlabel('Predicted: %s' % predict_label)
plt.show()
From the syntax above it seems the model isn't just a file but is a type/class/method depending on what name is most suitable for python.
Feels like this should only take one line of code but I haven't been able to find it anywhere.
Managed to do a simple version of it. The images coming up as a stream doesn't work for me using cv2 with Windows as it does for the pi. So instead I created a webpage in the same directory as this script. This generates an image with the bounding box, using a specified tflite model. This is in no way ideal.
It uses a webcam to get the image and saves the image to the directory the script is run in. It then renames the file so it can be viewed by the webpage I setup to view it.
The majority of this code comes from the TFLite Object Detection Raspberry Pi sample.
import time, os
from PIL import Image
from tflite_support import metadata
import platform
from typing import List, NamedTuple
import json
import cv2 as cv2
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
Interpreter = tf.lite.Interpreter
load_delegate = tf.lite.experimental.load_delegate
class ObjectDetectorOptions(NamedTuple):
"""A config to initialize an object detector."""
enable_edgetpu: bool = False
"""Enable the model to run on EdgeTPU."""
label_allow_list: List[str] = None
"""The optional allow list of labels."""
label_deny_list: List[str] = None
"""The optional deny list of labels."""
max_results: int = -1
"""The maximum number of top-scored detection results to return."""
num_threads: int = 1
"""The number of CPU threads to be used."""
score_threshold: float = 0.0
"""The score threshold of detection results to return."""
class Rect(NamedTuple):
"""A rectangle in 2D space."""
left: float
top: float
right: float
bottom: float
class Category(NamedTuple):
"""A result of a classification task."""
label: str
score: float
index: int
class Detection(NamedTuple):
"""A detected object as the result of an ObjectDetector."""
bounding_box: Rect
categories: List[Category]
def edgetpu_lib_name():
"""Returns the library name of EdgeTPU in the current platform."""
return {
'Darwin': 'libedgetpu.1.dylib',
'Linux': 'libedgetpu.so.1',
'Windows': 'edgetpu.dll',
}.get(platform.system(), None)
class ObjectDetector:
"""A wrapper class for a TFLite object detection model."""
_OUTPUT_LOCATION_NAME = 'location'
_OUTPUT_CATEGORY_NAME = 'category'
_OUTPUT_SCORE_NAME = 'score'
_OUTPUT_NUMBER_NAME = 'number of detections'
def __init__(
self,
model_path: str,
options: ObjectDetectorOptions = ObjectDetectorOptions()
) -> None:
"""Initialize a TFLite object detection model.
Args:
model_path: Path to the TFLite model.
options: The config to initialize an object detector. (Optional)
Raises:
ValueError: If the TFLite model is invalid.
OSError: If the current OS isn't supported by EdgeTPU.
"""
# Load metadata from model.
displayer = metadata.MetadataDisplayer.with_model_file(model_path)
# Save model metadata for preprocessing later.
model_metadata = json.loads(displayer.get_metadata_json())
process_units = model_metadata['subgraph_metadata'][0]['input_tensor_metadata'][0]['process_units']
mean = 0.0
std = 1.0
for option in process_units:
if option['options_type'] == 'NormalizationOptions':
mean = option['options']['mean'][0]
std = option['options']['std'][0]
self._mean = mean
self._std = std
# Load label list from metadata.
file_name = displayer.get_packed_associated_file_list()[0]
label_map_file = displayer.get_associated_file_buffer(file_name).decode()
label_list = list(filter(lambda x: len(x) > 0, label_map_file.splitlines()))
self._label_list = label_list
# Initialize TFLite model.
if options.enable_edgetpu:
if edgetpu_lib_name() is None:
raise OSError("The current OS isn't supported by Coral EdgeTPU.")
interpreter = Interpreter(
model_path=model_path,
experimental_delegates=[load_delegate(edgetpu_lib_name())],
num_threads=options.num_threads)
else:
interpreter = Interpreter(
model_path=model_path, num_threads=options.num_threads)
interpreter.allocate_tensors()
input_detail = interpreter.get_input_details()[0]
# From TensorFlow 2.6, the order of the outputs become undefined.
# Therefore we need to sort the tensor indices of TFLite outputs and to know
# exactly the meaning of each output tensor. For example, if
# output indices are [601, 599, 598, 600], tensor names and indices aligned
# are:
# - location: 598
# - category: 599
# - score: 600
# - detection_count: 601
# because of the op's ports of TFLITE_DETECTION_POST_PROCESS
# (https://github.com/tensorflow/tensorflow/blob/a4fe268ea084e7d323133ed7b986e0ae259a2bc7/tensorflow/lite/kernels/detection_postprocess.cc#L47-L50).
sorted_output_indices = sorted(
[output['index'] for output in interpreter.get_output_details()])
self._output_indices = {
self._OUTPUT_LOCATION_NAME: sorted_output_indices[0],
self._OUTPUT_CATEGORY_NAME: sorted_output_indices[1],
self._OUTPUT_SCORE_NAME: sorted_output_indices[2],
self._OUTPUT_NUMBER_NAME: sorted_output_indices[3],
}
self._input_size = input_detail['shape'][2], input_detail['shape'][1]
self._is_quantized_input = input_detail['dtype'] == np.uint8
self._interpreter = interpreter
self._options = options
def detect(self, input_image: np.ndarray) -> List[Detection]:
"""Run detection on an input image.
Args:
input_image: A [height, width, 3] RGB image. Note that height and width
can be anything since the image will be immediately resized according
to the needs of the model within this function.
Returns:
A Person instance.
"""
image_height, image_width, _ = input_image.shape
input_tensor = self._preprocess(input_image)
self._set_input_tensor(input_tensor)
self._interpreter.invoke()
# Get all output details
boxes = self._get_output_tensor(self._OUTPUT_LOCATION_NAME)
classes = self._get_output_tensor(self._OUTPUT_CATEGORY_NAME)
scores = self._get_output_tensor(self._OUTPUT_SCORE_NAME)
count = int(self._get_output_tensor(self._OUTPUT_NUMBER_NAME))
return self._postprocess(boxes, classes, scores, count, image_width,
image_height)
def _preprocess(self, input_image: np.ndarray) -> np.ndarray:
"""Preprocess the input image as required by the TFLite model."""
# Resize the input
input_tensor = cv2.resize(input_image, self._input_size)
# Normalize the input if it's a float model (aka. not quantized)
if not self._is_quantized_input:
input_tensor = (np.float32(input_tensor) - self._mean) / self._std
# Add batch dimension
input_tensor = np.expand_dims(input_tensor, axis=0)
return input_tensor
def _set_input_tensor(self, image):
"""Sets the input tensor."""
tensor_index = self._interpreter.get_input_details()[0]['index']
input_tensor = self._interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = image
def _get_output_tensor(self, name):
"""Returns the output tensor at the given index."""
output_index = self._output_indices[name]
tensor = np.squeeze(self._interpreter.get_tensor(output_index))
return tensor
def _postprocess(self, boxes: np.ndarray, classes: np.ndarray,
scores: np.ndarray, count: int, image_width: int,
image_height: int) -> List[Detection]:
"""Post-process the output of TFLite model into a list of Detection objects.
Args:
boxes: Bounding boxes of detected objects from the TFLite model.
classes: Class index of the detected objects from the TFLite model.
scores: Confidence scores of the detected objects from the TFLite model.
count: Number of detected objects from the TFLite model.
image_width: Width of the input image.
image_height: Height of the input image.
Returns:
A list of Detection objects detected by the TFLite model.
"""
results = []
# Parse the model output into a list of Detection entities.
for i in range(count):
if scores[i] >= self._options.score_threshold:
y_min, x_min, y_max, x_max = boxes[i]
bounding_box = Rect(
top=int(y_min * image_height),
left=int(x_min * image_width),
bottom=int(y_max * image_height),
right=int(x_max * image_width))
class_id = int(classes[i])
category = Category(
score=scores[i],
label=self._label_list[class_id], # 0 is reserved for background
index=class_id)
result = Detection(bounding_box=bounding_box, categories=[category])
results.append(result)
# Sort detection results by score ascending
sorted_results = sorted(
results,
key=lambda detection: detection.categories[0].score,
reverse=True)
# Filter out detections in deny list
filtered_results = sorted_results
if self._options.label_deny_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label not in self.
_options.label_deny_list, filtered_results))
# Keep only detections in allow list
if self._options.label_allow_list is not None:
filtered_results = list(
filter(
lambda detection: detection.categories[0].label in self._options.
label_allow_list, filtered_results))
# Only return maximum of max_results detection.
if self._options.max_results > 0:
result_count = min(len(filtered_results), self._options.max_results)
filtered_results = filtered_results[:result_count]
return filtered_results
_MARGIN = 10 # pixels
_ROW_SIZE = 10 # pixels
_FONT_SIZE = 1
_FONT_THICKNESS = 1
_TEXT_COLOR = (0, 0, 255) # red
def visualize(
image: np.ndarray,
detections: List[Detection],
) -> np.ndarray:
"""Draws bounding boxes on the input image and return it.
Args:
image: The input RGB image.
detections: The list of all "Detection" entities to be visualize.
Returns:
Image with bounding boxes.
"""
for detection in detections:
# Draw bounding_box
start_point = detection.bounding_box.left, detection.bounding_box.top
end_point = detection.bounding_box.right, detection.bounding_box.bottom
cv2.rectangle(image, start_point, end_point, _TEXT_COLOR, 3)
# Draw label and score
category = detection.categories[0]
class_name = category.label
probability = round(category.score, 2)
result_text = class_name + ' (' + str(probability) + ')'
text_location = (_MARGIN + detection.bounding_box.left,
_MARGIN + _ROW_SIZE + detection.bounding_box.top)
cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
_FONT_SIZE, _TEXT_COLOR, _FONT_THICKNESS)
return image
# ---------------------------------- #
# This is where the custom code starts
# ---------------------------------- #
# Load the TFLite model
TFLITE_MODEL_PATH='object.tflite'
DETECTION_THRESHOLD = 0.5 # 50% threshold required before identifying
options = ObjectDetectorOptions(
num_threads=4,
score_threshold=DETECTION_THRESHOLD,
)
# Close camera if already open
try:
cap.release()
except:
print("",end="") # do nothing
detector = ObjectDetector(model_path=TFLITE_MODEL_PATH, options=options)
cap = cv2.VideoCapture(0) #webcam
counter = 0 # Store many times model has run
while cap.isOpened():
success, image = cap.read()
if not success:
sys.exit(
'ERROR: Unable to read from webcam. Please verify your webcam settings.'
)
image = cv2.flip(image, 1)
# Convert the image from BGR to RGB as required by the TFLite model.
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#image.thumbnail((512, 512), Image.ANTIALIAS)
image_np = np.asarray(image)
# Run object detection estimation using the model.
detections = detector.detect(image_np)
# Draw keypoints and edges on input image
image_np = visualize(image_np, detections)
if counter == 10: # <- Change this to decide how many iterations
cap.release()
break
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
plt.imsave('tmp.jpg',image_np) # Saves the image
os.replace("tmp.jpg", "web.jpg",) # Renames it for the webpage
counter += 1
print(counter)
cap.release()
Here's the HTML for the document placed in the same directory as the python file, I saved it as index.html and opened in the browser while running the python script above.
<!DOCTYPE html>
<html>
<head>
<title>Object Detection</title>
</head>
<body>
<h1>Object Detection</h1>
<p>This displays images saved during detection process</p>
<canvas id="x" width="700px" height="500px"></canvas>
<script>
var newImage = new Image();
newImage.src = "web.jpg";
var canvas = document.getElementById("x");
var context = canvas.getContext("2d");
newImage.onload = function() {
context.drawImage(newImage, 0, 0);
console.log("trigger")
setTimeout(timedRefresh, 1000);
};
function timedRefresh() {
// just change src attribute, will always trigger the onload callback
try {
newImage.src = ("web.jpg#" + new Date().getTime());
}catch(e){
console.log(e);
}
}
setTimeout(timedRefresh, 100);
</script>
</body>
</html>
It's incredibly slow, not ideal in many ways and probably breaks many good coding conventions. It was only used locally, would definitely not use this for a production environment nor recommend its use. Just needed a quick proof of concept and this worked for that.

Getting error:([WinError 123] The filename, directory name, or volume label syntax is incorrect: '//') while training custom dataset through mask rcnn

I'm new to Python and I'm trying to use mask-rcnn-tf2 to detect objects in Conda's jupyter notebook. But I get this error every time. I am just stuck here.
Here is my code:
from mrcnn.utils import Dataset
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
# class that defines and loads the kangaroo dataset
class Contest(Dataset):
# load the dataset definitions
def load_dataset(self, dataset_csv,image_dir, is_train=True):
# define one class
self.add_class("dataset", 1, "Car")
# define data locations
images_dir = image_dir + 'train_images/'
# annotations_dir = dataset_dir + '/annots/'
# find all images
#for filename in os.listdir(images_dir):
for id, filename in zip(id_s,img):
# extract image id
#print(i)
image_id = id
# skip bad images
#if image_id in ["100"]:
#continue
# skip all images after 150 if we are building the train set
if is_train and int(image_id) >= 5000:
continue
# skip all images before 150 if we are building the test/val set
if not is_train and int(image_id) >= 4000:
continue
img_path = images_dir + filename
if os.path.isfile(img_path) == False:
continue
# if im.size == 0:
# continue
# ann_path = annotations_dir + image_id + '.xml'
# add to dataset
self.add_image('dataset', image_id=image_id, path=img_path)
def extract_boxes(self,dataset_csv,image_id): #For getting all the bbox category ids and width and height of the image on the bases of image id
# box = np.array([])
info = self.image_info[image_id]
box = list()
# bbox = data[data['image_id']==image_id]['bbox']
for i in range(len(b_mat)):
if b_mat[i][2] == 1 and b_mat[i][1] == info['id']:
# print(b_mat[i][0])
bbox = b_mat[i][0]
box.append(bbox)
wid = data[data['image_id']==image_id]['width'].unique()[0]
hei = data[data['image_id']==image_id]['height'].unique()[0]
return box , wid ,hei
# load the masks for an image
def load_mask(self, image_id):
# get details of image
info = self.image_info[image_id]
#print(info[0])
# define box file location
#path = info['annotation']
# load XML
boxes, w, h = self.extract_boxes(data,image_id)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, col_s = box[0], box[1]
row_e, col_e = box[0]+box[2], box[1]+box[3]
#print(i)
#masks[row_e-row_s:col_e-col_s,col_s-col_e:row_s-row_e,i] = 1
#masks[row_s:row_e,col_s:col_e,i] = 1
masks[col_s:col_e, row_s:row_e,i] = 1
#masks[col_s:row_s, col_e:row_e,i] = 1
#print(row_s,row_e ,col_s,col_e,info,image_id)
#print(row_s,row_e, col_s,col_e)
#print(box[0], box[1], box[2], box[3],info['id'])
class_ids.append(self.class_names.index('Car'))
return masks, asarray(class_ids, dtype='int32')
# load an image reference
def image_reference(self, image_id):
info = self.image_info[image_id]
return info['path']
# define a configuration for the model
class CarConfig(Config):
# define the name of the configuration
NAME = "Contest_cfg"
# number of classes (background + kangaroo)
NUM_CLASSES = 1 + 1
# number of training steps per epoch
STEPS_PER_EPOCH = 200
# train set
image_dir = 'G:/My Drive/train_images/'
train_set = Contest()
train_set.load_dataset(data,image_dir,is_train=True)
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
test_set = Contest()
test_set.load_dataset(data,image_dir, is_train=False)
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))
# image_id = 1
# # load the image
# image = train_set.load_image(image_id)
# # load the masks and the class ids
# mask, class_ids = train_set.load_mask(image_id)
# # extract bounding boxes from the masks
# bbox = extract_bboxes(mask)
# # display image with masks and bounding boxes
# display_instances(image, bbox, mask, class_ids, train_set.class_names)
#prepare config
config = CarConfig()
config.display()
# define the model
model = MaskRCNN(mode='training', model_dir=r"D:/", config=config)
# load weights (mscoco) and exclude the output layers
model.load_weights(r"D:/Najam/mask_rcnn_coco.h5", by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
model.train(train_set,test_set, learning_rate=config.LEARNING_RATE, epochs=3, layers='head')
Above is the code I have written for getting the dataset and training the dataset.
Error:
OSError Traceback (most recent call last)
<ipython-input-25-c0b9708402b6> in <module>
118 model.load_weights(r"D:/Najam/mask_rcnn_coco.h5", by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
119 # train weights (output layers or 'heads')
--> 120 model.train(train_set,test_set, learning_rate=config.LEARNING_RATE, epochs=3, layers='head')
D:\Najam\matterport\mrcnn\model.py in train(self, train_dataset, val_dataset, learning_rate, epochs, layers, augmentation, custom_callbacks, no_augmentation_sources)
2345 batch_size=self.config.BATCH_SIZE)
2346
-> 2347 # Create log_dir if it does not exist
2348 if not os.path.exists(self.log_dir):
2349 print(self.log_dir)
~\anaconda3\envs\myenv\lib\os.py in makedirs(name, mode, exist_ok)
208 if head and tail and not path.exists(head):
209 try:
--> 210 makedirs(head, mode, exist_ok)
211 except FileExistsError:
212 # Defeats race condition when another thread created the path
~\anaconda3\envs\myenv\lib\os.py in makedirs(name, mode, exist_ok)
208 if head and tail and not path.exists(head):
209 try:
--> 210 makedirs(head, mode, exist_ok)
211 except FileExistsError:
212 # Defeats race condition when another thread created the path
~\anaconda3\envs\myenv\lib\os.py in makedirs(name, mode, exist_ok)
218 return
219 try:
--> 220 mkdir(name, mode)
221 except OSError:
222 # Cannot rely on checking for EEXIST, since the operating system
OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: '//'
So you're getting an error because the script is trying to create some folders for your logs at a path that is not accepted.
I'm not sure why you're getting this error, usually it happens when in the specified folders path that you're trying to create there are forbidden characters such as forward slash, asterisks, question marks, backslash, double quotes, etc. (a complete list can be found here).
However with your path //logdir//train this doesn't seem the case. What I can suggest you is to try setting self.log_dir (if you're looking for it inside the code is here) to a path of your choice. Maybe something like this (you're under windows so it will be a little bit different):
self.log_dir = "/Users/claudia/Desktop/logs/"
Just make sure that you don't have forbidden characters in your path and you should be ok. Also if you want you can create the folders on your own and you won't have the error.
Hope this helps.

Writing custom datasets and dataloaders with PyTorch

Good afternoon!
I have questions about the following tutorial:
https://pytorch.org/tutorials/beginner/data_loading_tutorial.html 1
I have a similar dataset (images + landmarks). I’ve built the custom dataloader following the tutorial and checked the types of dataloader components (torch.float64 for both images and landmarks).
Then I applied the dataloader to the classification model with this training class:
class Trainer():
def __init__(self,criterion = None,optimizer = None,schedular = None):
self.criterion = criterion
self.optimizer = optimizer
self.schedular = schedular
def train_batch_loop(self,model,train_dataloader):
train_loss = 0.0
train_acc = 0.0
for images,landmarks, labels in train_dataloader:
images = images.to(device)
landmarks = landmarks.to(device)
labels = labels.to(device)
self.optimizer.zero_grad()
I won’t be elaborating further because the training crushes at images = images.to(device) with the following error: AttributeError: ‘str’ object has no attribute 'to’
I don’t understand where this string is coming from if all the dataloader components are torch.float64.
I went back to check the initial data: in the tutorial, the landmarks are summarized in a pandas dataframe with landmark values as int64 and image name as “object”.
In my summary dataframe image name is an “object” as well and landmarks are numpy.float64. Again, no strings anywhere…
Appreciate any advice - what else should I check in addition to dtypes?
There are 30 cats and 48 landmarks for each image
The dataset is defined as follows:
class FaceLandmarksDataset(Dataset):
def __init__(self, data_frame, root_dir, transform=None):
self.data_frame = data_frame
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir,
self.data_frame.iloc[idx, 2])
image = io.imread(img_name)
landmarks = self.data_frame.iloc[idx, 3:]
landmarks = np.array([landmarks])
landmarks = landmarks.astype('float').reshape(-1, 2)
labels = self.data_frame.iloc[idx, 1].reshape(1)
sample = {'image': image, 'landmarks': landmarks, 'labels': labels}
if self.transform:
sample = self.transform(sample)
return sample
Hi i made some changes based on your dataset, please make adjustment where you see fit, as i don't have your data i cannot test this out but this should be it based on on my understanding
import torch
from torch.utils.data import Dataset
import os
from skimage import io
import numpy as np
from typing import Dict
class FaceLandmarksDataset(Dataset):
def __init__(self, data_frame, root_dir, transform=None):
self.data_frame = data_frame
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_frame)
def __getitem__(self, idx) -> Dict[str, torch.Tensor]:
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir,self.data_frame.iloc[idx]['image_name'])
image = io.imread(img_name)
landmarks = self.data_frame.iloc[idx, 3:].tolist()
landmarks = np.array(landmarks)
landmarks = landmarks.astype('float32').reshape(-1, 2)
labels = self.data_frame.iloc[idx]['label']
# Your transforming your image only therefore pass in the array
if self.transform:
image = self.transform(image)
# Create dictionary after finishing all transforms -> automatically becomes torch tensor when passed
sample = {
'image': image,
'landmarks': landmarks,
'labels': labels
}
return sample

Training stuck at Epoch 3 PyTorch

I am training a custom Encoder-Decoder network but the training gets stuck at Epoch 3. Nothing happens for about 2 hours. I will share the Dataset class and the DataLoader object. The version if CUDA and GPU can be seen in the pic below.
Training stuck here:
nvidia-smi output looks like this:
The __getitem__ method of the dataset class looks like this:
def __init__(self,
images_dir,
annots_dir,
train=True,
img_size=(512, 1536),
stride=4,
model='custom',
transforms=None):
"""
:param root: dataset directory
:param filenames: filenames inside the root directory
:param labels: Object Detection Labels
super(CustomDataset).__init__()
self.images_dir = images_dir
self.annots_dir = annots_dir
self.train = train
self.image_size = img_size
self.stride = stride
self.transforms = transforms
self.model = model
# Load the image and annotation files from the dataset
# self.image_files, self.annot_files = self._load_image_and_annot_files()
self.image_files = [os.path.join(self.images_dir, idx) for idx in os.listdir(self.images_dir)]
self.annot_files = [os.path.join(self.annots_dir, idx) for idx in os.listdir(self.annots_dir)]
def __getitem__(self, index):
"""
:param index: index...0 to N
:return: tensor_image and tensor_label
"""
# Image filename from _load_image_files()
# Load Image with _read_matrix() and label
curr_image_filename = self.image_files[index]
curr_annot_filename = self.annot_files[index]
# curr_image_filename = self.image_files[index]
# curr_annot_filename = self.annot_files[index]
np_image = self._read_matrix(raw_img=curr_image_filename)
np_image_normalized = np.squeeze(self._normalize_raw_img(np_image))
# label = self.labels[index]
boxes, classes, depths, tgts = self._load_annotations(curr_annot_filename)
# Normalize bounding boxes: range [0, 1]
targets_normalized = self._normalize_bbox(np_image_normalized, tgts)
# image and the corresponding label should be a tensor
torch_image = torch.from_numpy(np_image).reshape(1, 512, 1536).float() # dtype: torch.float64
torch_boxes = torch.from_numpy(boxes).type(torch.FloatTensor)
torch_depths = torch.from_numpy(depths)
if self.model == 'fasterrcnn':
# For FasterRCNN: As COCO format
area = (torch_boxes[:, 3] - torch_boxes[:, 1]) * (torch_boxes[:, 2] - torch_boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
image_id = torch.Tensor([index])
torch_classes = torch.from_numpy(classes)
target = {'boxes': torch_boxes, 'labels': torch_classes.long(),
'area': area, 'iscrowd': iscrowd, 'image_id': image_id}
return torch_image, target
elif self.model == 'custom':
if self.train:
if self.transforms:
try:
tr = self.transforms()
transform_image, transform_boxes, labels = tr.__call__(np_image, tgts, tgts[:, :4], tgts[:, 4:])
transform_targets = np.hstack((np.array(transform_boxes), labels))
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=transform_targets)
return torch.from_numpy(transform_image).float(), gt_tensor
except IndexError:
pass
else:
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=targets_normalized)
return torch_image, gt_tensor
else:
return torch_image, targets_normalized
And in the train.py script the DataLoader object is:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
shuffle=True,
batch_size=1,
num_workers=0,
collate_fn=detection_collate,
pin_memory=True)
Why does the training get stuck? Is there an issue with the __getitem__ method? Or the DataLoader?
Thank You.
This happens because torch doesnt restart your dataset, if your data runs out it stops and waits for more input so cycling has to be done manually.
I used something along the lines of
from itertools import cycle
class Dataloader():
#init and whatever
self.__iter__():
return cycle(get_sample()) # get_sample is your current getitem

Categories

Resources