Good afternoon!
I have questions about the following tutorial:
https://pytorch.org/tutorials/beginner/data_loading_tutorial.html 1
I have a similar dataset (images + landmarks). I’ve built the custom dataloader following the tutorial and checked the types of dataloader components (torch.float64 for both images and landmarks).
Then I applied the dataloader to the classification model with this training class:
class Trainer():
def __init__(self,criterion = None,optimizer = None,schedular = None):
self.criterion = criterion
self.optimizer = optimizer
self.schedular = schedular
def train_batch_loop(self,model,train_dataloader):
train_loss = 0.0
train_acc = 0.0
for images,landmarks, labels in train_dataloader:
images = images.to(device)
landmarks = landmarks.to(device)
labels = labels.to(device)
self.optimizer.zero_grad()
I won’t be elaborating further because the training crushes at images = images.to(device) with the following error: AttributeError: ‘str’ object has no attribute 'to’
I don’t understand where this string is coming from if all the dataloader components are torch.float64.
I went back to check the initial data: in the tutorial, the landmarks are summarized in a pandas dataframe with landmark values as int64 and image name as “object”.
In my summary dataframe image name is an “object” as well and landmarks are numpy.float64. Again, no strings anywhere…
Appreciate any advice - what else should I check in addition to dtypes?
There are 30 cats and 48 landmarks for each image
The dataset is defined as follows:
class FaceLandmarksDataset(Dataset):
def __init__(self, data_frame, root_dir, transform=None):
self.data_frame = data_frame
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir,
self.data_frame.iloc[idx, 2])
image = io.imread(img_name)
landmarks = self.data_frame.iloc[idx, 3:]
landmarks = np.array([landmarks])
landmarks = landmarks.astype('float').reshape(-1, 2)
labels = self.data_frame.iloc[idx, 1].reshape(1)
sample = {'image': image, 'landmarks': landmarks, 'labels': labels}
if self.transform:
sample = self.transform(sample)
return sample
Hi i made some changes based on your dataset, please make adjustment where you see fit, as i don't have your data i cannot test this out but this should be it based on on my understanding
import torch
from torch.utils.data import Dataset
import os
from skimage import io
import numpy as np
from typing import Dict
class FaceLandmarksDataset(Dataset):
def __init__(self, data_frame, root_dir, transform=None):
self.data_frame = data_frame
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_frame)
def __getitem__(self, idx) -> Dict[str, torch.Tensor]:
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir,self.data_frame.iloc[idx]['image_name'])
image = io.imread(img_name)
landmarks = self.data_frame.iloc[idx, 3:].tolist()
landmarks = np.array(landmarks)
landmarks = landmarks.astype('float32').reshape(-1, 2)
labels = self.data_frame.iloc[idx]['label']
# Your transforming your image only therefore pass in the array
if self.transform:
image = self.transform(image)
# Create dictionary after finishing all transforms -> automatically becomes torch tensor when passed
sample = {
'image': image,
'landmarks': landmarks,
'labels': labels
}
return sample
Related
I'm trying to train a custom COCO-format dataset with Matterport's Mask R-CNN on Tensorflow/Keras. My datasets are json files with the aforementioned COCO-format, with each item in the "annotations" section looking like this:
There are 20 classes, with polygon masks for the entire object, and then polygon masks for the parts within the object. The images are of shape 256x448x3.
The code for the dataset preparation is as follows:
class CocoLikeDataset(utils.Dataset):
""" Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
See http://cocodataset.org/#home for more information.
"""
def load_data(self, annotation_json, images_dir):
""" Load the coco-like dataset from json
Args:
annotation_json: The path to the coco annotations json file
images_dir: The directory holding the images referred to by the json file
"""
# Load json from file
json_file = open(annotation_json)
coco_json = json.load(json_file)
json_file.close()
# Add the class names using the base method from utils.Dataset
source_name = "coco_like"
for category in coco_json['categories']:
class_id = category['id']+1
class_name = category['name']
if class_id < 1:
print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
return
self.add_class(source_name, class_id, class_name)
# Get all annotations
annotations = {}
for annotation in coco_json['annotations']:
image_id = annotation['image_id']
if image_id not in annotations:
annotations[image_id] = []
annotations[image_id].append(annotation)
# Get all images and add them to the dataset
seen_images = {}
for image in coco_json['images']:
image_id = image['id']
if image_id in seen_images:
print("Warning: Skipping duplicate image id: {}".format(image))
else:
seen_images[image_id] = image
try:
image_file_name = image['file_name']
image_width = image['width']
image_height = image['height']
except KeyError as key:
print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
image_annotations = annotations[image_id]
# Add the image using the base method from utils.Dataset
self.add_image(
source=source_name,
image_id=image_id,
path=image_path,
width=image_width,
height=image_height,
annotations=image_annotations
)
def load_mask(self, image_id):
""" Load instance masks for the given image.
MaskRCNN expects masks in the form of a bitmap [height, width, instances].
Args:
image_id: The id of the image to load masks for
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
annotations = image_info['annotations']
instance_masks = []
class_ids = []
for annotation in annotations:
class_id = annotation['category_id']
mask = Image.new('1', (image_info['width'], image_info['height']))
mask_draw = ImageDraw.ImageDraw(mask, '1')
for segmentation in annotation['segmentation']:
mask_draw.polygon(segmentation, fill=1)
bool_array = np.array(mask) > 0
instance_masks.append(bool_array)
class_ids.append(class_id)
mask = np.dstack(instance_masks)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
This seems to work well at least in loading the images & masks in a presentable format, as testing it out to visualize the images, masks, and class ids for some of the images yields good results where I can see each image, binary mask, and class ids of each mask.
However, when I actually try to train the model on the training dataset created, I get the following error:
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
1 # Create model in training mode
2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3 model_dir=MODEL_DIR)
3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
1835 self.model_dir = model_dir
1836 self.set_log_dir()
-> 1837 self.keras_model = self.build(mode=mode, config=config)
1838
1839 def build(self, mode, config):
/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
1927 # Anchors
1928 if mode == "training":
-> 1929 anchors = self.get_anchors(config.IMAGE_SHAPE)
1930 # Duplicate across the batch dimension because Keras requires it
1931 # TODO: can this be optimized to avoid duplicating the anchors?
/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
2609 backbone_shapes,
2610 self.config.BACKBONE_STRIDES,
-> 2611 self.config.RPN_ANCHOR_STRIDE)
2612 # Keep a copy of the latest anchors in pixel coordinates because
2613 # it's used in inspect_model notebooks.
/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
635 anchors = []
636 for i in range(len(scales)):
--> 637 anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 feature_strides[i], anchor_stride))
639 return np.concatenate(anchors, axis=0)
IndexError: index 5 is out of bounds for axis 0 with size 5
I have no clue what this error is actually indicating, nor what the potential solution might possibly be. I have a feeling it might have to do with how the data is being formatted & handled by the CocoLikeDataset class, but I'm not sure.
Any help with this identifying the issue and solving it is appreciated!
Thanks!
Set the RPN_ANCHOR_SCALE variable in your config file to:
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
I am training a custom Encoder-Decoder network but the training gets stuck at Epoch 3. Nothing happens for about 2 hours. I will share the Dataset class and the DataLoader object. The version if CUDA and GPU can be seen in the pic below.
Training stuck here:
nvidia-smi output looks like this:
The __getitem__ method of the dataset class looks like this:
def __init__(self,
images_dir,
annots_dir,
train=True,
img_size=(512, 1536),
stride=4,
model='custom',
transforms=None):
"""
:param root: dataset directory
:param filenames: filenames inside the root directory
:param labels: Object Detection Labels
super(CustomDataset).__init__()
self.images_dir = images_dir
self.annots_dir = annots_dir
self.train = train
self.image_size = img_size
self.stride = stride
self.transforms = transforms
self.model = model
# Load the image and annotation files from the dataset
# self.image_files, self.annot_files = self._load_image_and_annot_files()
self.image_files = [os.path.join(self.images_dir, idx) for idx in os.listdir(self.images_dir)]
self.annot_files = [os.path.join(self.annots_dir, idx) for idx in os.listdir(self.annots_dir)]
def __getitem__(self, index):
"""
:param index: index...0 to N
:return: tensor_image and tensor_label
"""
# Image filename from _load_image_files()
# Load Image with _read_matrix() and label
curr_image_filename = self.image_files[index]
curr_annot_filename = self.annot_files[index]
# curr_image_filename = self.image_files[index]
# curr_annot_filename = self.annot_files[index]
np_image = self._read_matrix(raw_img=curr_image_filename)
np_image_normalized = np.squeeze(self._normalize_raw_img(np_image))
# label = self.labels[index]
boxes, classes, depths, tgts = self._load_annotations(curr_annot_filename)
# Normalize bounding boxes: range [0, 1]
targets_normalized = self._normalize_bbox(np_image_normalized, tgts)
# image and the corresponding label should be a tensor
torch_image = torch.from_numpy(np_image).reshape(1, 512, 1536).float() # dtype: torch.float64
torch_boxes = torch.from_numpy(boxes).type(torch.FloatTensor)
torch_depths = torch.from_numpy(depths)
if self.model == 'fasterrcnn':
# For FasterRCNN: As COCO format
area = (torch_boxes[:, 3] - torch_boxes[:, 1]) * (torch_boxes[:, 2] - torch_boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
image_id = torch.Tensor([index])
torch_classes = torch.from_numpy(classes)
target = {'boxes': torch_boxes, 'labels': torch_classes.long(),
'area': area, 'iscrowd': iscrowd, 'image_id': image_id}
return torch_image, target
elif self.model == 'custom':
if self.train:
if self.transforms:
try:
tr = self.transforms()
transform_image, transform_boxes, labels = tr.__call__(np_image, tgts, tgts[:, :4], tgts[:, 4:])
transform_targets = np.hstack((np.array(transform_boxes), labels))
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=transform_targets)
return torch.from_numpy(transform_image).float(), gt_tensor
except IndexError:
pass
else:
gt_tensor = gt_creator(img_size=self.image_size,
stride=self.stride,
num_classes=8,
label_lists=targets_normalized)
return torch_image, gt_tensor
else:
return torch_image, targets_normalized
And in the train.py script the DataLoader object is:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
shuffle=True,
batch_size=1,
num_workers=0,
collate_fn=detection_collate,
pin_memory=True)
Why does the training get stuck? Is there an issue with the __getitem__ method? Or the DataLoader?
Thank You.
This happens because torch doesnt restart your dataset, if your data runs out it stops and waits for more input so cycling has to be done manually.
I used something along the lines of
from itertools import cycle
class Dataloader():
#init and whatever
self.__iter__():
return cycle(get_sample()) # get_sample is your current getitem
The problem: I am unable to process CNN model for training 8-channel .TIF images.
Expected Output: Map training data (train_ds) via gdal and train model.
data (images):
n = 600
shape = (256, 256, 8)
data structure:
project_photos/
....classes/
......barren/
......agriculture/
......wooded/
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
import pathlib
>print (tf.__version__)
2.1.0
data_dir = ".\projects\keras\projectA\project_photos\classes")
data_dir = pathlib.Path(data_dir)
image_count = len(list(data_dir.glob('*/*.tif')))
>print(image_count)
600
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
batch_size = 32
img_height = 256
img_width = 256
>for f in list_ds.take(5):
> print(f.numpy())
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded//12345_w0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0002.tif'
b'/home/projects/keras/projectA/project_photos/classes/agriculture//12345_a0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded/12345_w0002.tif'
# tree structure
>class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)
['barren' 'agriculture' 'wooded']
# train/validation split
val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
one_hot = parts[-2] == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# resize the image to the desired size
return tf.image.resize(img, [img_height, img_width])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
I understand that tensorflow has limited support (experimental) for decode_tiff, and even if that did work - I am unable to use the latest version of TF that has that update.
This leaves me with attempting workarounds, the following - which have not succeeded:
"""
Updating decode_img(img) in attempt to process 8-channel .TIF raster
"""
#attempt, adding gdal_Open variable to decode_img
## fails due to image path (train_ds) being stored as byte.
x = gdal.Open(file_path)
Error: Not a string.
#attempt, modifying to extract PATH as str().
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = ''
for fpath in file_path:
img = fpath.numy()
img = decode_img(img)
return img, label
>train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
ValueError: len requires a non-scalar tensor, got one of shape Tensor("Shape:0", shape=(0,), dtype=int32)
#attempt, processing outside of `.map`, works just fine.
imgList = []
for elem in train_ds:
img = elem.numpy()
img = img.decode()
imgList.append(img)
file_path = imgList[0]
raster = gdal.Open(file_path)
bands = [raster.GetRasterBand(k + 1).ReadAsArray() for k in range (raster.RasterCount)]
n_bands = len(bands)
img_array = np.stack(bands,2)
img = tf.convert_to_tensor(img_array, dtype = tf.float32)
img = tf.image.resize(img, [img_height, img_width])
print(type(img))
print(img.numpy().shape)
<class: 'tensorflow.python.framework.ops.EagerTensor'>
(256, 256, 8)
So, any ideas on how I can get this to work within the TF framework - getting TF to process the raster via .map?
I am a little bit confused about the data augmentation performed in PyTorch.
Because we are dealing with segmentation tasks, we need data and mask for the same data augmentation, but some of them are random, such as random rotation.
Keras provides a random seed guarantee that data and mask do the same operation, as shown in the following code:
data_gen_args = dict(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=25,
horizontal_flip=True,
vertical_flip=True)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
seed = 1
image_generator = image_datagen.flow(train_data, seed=seed, batch_size=1)
mask_generator = mask_datagen.flow(train_label, seed=seed, batch_size=1)
train_generator = zip(image_generator, mask_generator)
I didn't find a similar description in the official Pytorch documentation, so I don't know how to ensure that data and mask can be processed synchronously.
Pytorch does provide such a function, but I want to apply it to a custom Dataloader.
For example:
def __getitem__(self, index):
img = np.zeros((self.im_ht, self.im_wd, channel_size))
mask = np.zeros((self.im_ht, self.im_wd, channel_size))
temp_img = np.load(Image_path + '{:0>4}'.format(self.patient_index[index]) + '.npy')
temp_label = np.load(Label_path + '{:0>4}'.format(self.patient_index[index]) + '.npy')
for i in range(channel_size):
img[:,:,i] = temp_img[self.count[index] + i]
mask[:,:,i] = temp_label[self.count[index] + i]
if self.transforms:
img = np.uint8(img)
mask = np.uint8(mask)
img = self.transforms(img)
mask = self.transforms(mask)
return img, mask
In this case, img and mask will be transformed separately, because some operations such as random rotation are random, so the correspondence between mask and image may be changed. In other words, the image may have rotated but the mask did not do this.
EDIT 1
I used the method in augmentations.py, but I got an error::
Traceback (most recent call last):
File "test_transform.py", line 87, in <module>
for batch_idx, image, mask in enumerate(train_loader):
File "/home/dirk/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 314, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/home/dirk/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 314, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/home/dirk/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataset.py", line 103, in __getitem__
return self.dataset[self.indices[idx]]
File "/home/dirk/home/data/dirk/segmentation_unet_pytorch/data.py", line 164, in __getitem__
img, mask = self.transforms(img, mask)
File "/home/dirk/home/data/dirk/segmentation_unet_pytorch/augmentations.py", line 17, in __call__
img, mask = a(img, mask)
TypeError: __call__() takes 2 positional arguments but 3 were given
This is my code for __getitem__():
data_transforms = {
'train': Compose([
RandomHorizontallyFlip(),
RandomRotate(degree=25),
transforms.ToTensor()
]),
}
train_set = DatasetUnetForTestTransform(fold=args.fold, random_index=args.random_index,transforms=data_transforms['train'])
# __getitem__ in class DatasetUnetForTestTransform
def __getitem__(self, index):
img = np.zeros((self.im_ht, self.im_wd, channel_size))
mask = np.zeros((self.im_ht, self.im_wd, channel_size))
temp_img = np.load(Label_path + '{:0>4}'.format(self.patient_index[index]) + '.npy')
temp_label = np.load(Label_path + '{:0>4}'.format(self.patient_index[index]) + '.npy')
temp_img, temp_label = crop_data_label_from_0(temp_img, temp_label)
for i in range(channel_size):
img[:,:,i] = temp_img[self.count[index] + i]
mask[:,:,i] = temp_label[self.count[index] + i]
if self.transforms:
img = T.ToPILImage()(np.uint8(img))
mask = T.ToPILImage()(np.uint8(mask))
img, mask = self.transforms(img, mask)
img = T.ToTensor()(img).copy()
mask = T.ToTensor()(mask).copy()
return img, mask
EDIT 2
I found that after ToTensor, the dice between the same labels becomes 255 instead of 1, how to fix it?
# Dice computation
def DSC_computation(label, pred):
pred_sum = pred.sum()
label_sum = label.sum()
inter_sum = np.logical_and(pred, label).sum()
return 2 * float(inter_sum) / (pred_sum + label_sum)
Feel free to ask if more code is needed to explain the problem.
Transforms which require input parameters like RandomCrop has a get_param method which would return the parameters for that particular transformation. This can be then applied to both the image and mask using the functional interface of transforms:
from torchvision import transforms
import torchvision.transforms.functional as F
i, j, h, w = transforms.RandomCrop.get_params(input, (100, 100))
input = F.crop(input, i, j, h, w)
target = F.crop(target, i, j, h, w)
Sample available here:
https://github.com/pytorch/vision/releases/tag/v0.2.0
Complete example available here for VOC & COCO:
https://github.com/pytorch/vision/blob/master/references/segmentation/transforms.py
https://github.com/pytorch/vision/blob/master/references/segmentation/train.py
Regarding the error,
ToTensor() was not overridden to handle additional mask argument, so it cannot be in data_transforms. Moreover, __getitem__ does ToTensor of both img and mask before returning them.
data_transforms = {
'train': Compose([
RandomHorizontallyFlip(),
RandomRotate(degree=25),
#transforms.ToTensor() => remove this line
]),
}
torchvision also provides similar functions [document].
Here is a simple example,
import torchvision
from torchvision import transforms
trans = transforms.Compose([transforms.CenterCrop((178, 178)),
transforms.Resize(128),
transforms.RandomRotation(20),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dset = torchvision.datasets.MNIST(data_root, transforms=trans)
EDIT
A brief example when customizing your own CelebA dataset. Note that, to apply transformations, you need call transform list in __getitem__.
class CelebADataset(Dataset):
def __init__(self, root, transforms=None, num=None):
super(CelebADataset, self).__init__()
self.img_root = os.path.join(root, 'img_align_celeba')
self.attr_root = os.path.join(root, 'Anno/list_attr_celeba.txt')
self.transforms = transforms
df = pd.read_csv(self.attr_root, sep='\s+', header=1, index_col=0)
#print(df.columns.tolist())
if num is None:
self.labels = df.values
self.img_name = df.index.values
else:
self.labels = df.values[:num]
self.img_name = df.index.values[:num]
def __getitem__(self, index):
img = Image.open(os.path.join(self.img_root, self.img_name[index]))
# only use blond_hair, eyeglass, male, smile
indices = [9, 15, 20, 31]
label = np.take(self.labels[index], indices)
label[label==-1] = 0
if self.transforms is not None:
img = self.transforms(img)
return np.asarray(img), label
def __len__(self):
return len(self.labels)
EDIT 2
I probably miss something at the first glance. The main point of your problem is how to apply "the same" data preprocessing to img and labels. To my understanding, there is no available Pytorch built-in function. So, what I did before is to implement the augmentation by myself.
class RandomRotate(object):
def __init__(self, degree):
self.degree = degree
def __call__(self, img, mask):
rotate_degree = random.random() * 2 * self.degree - self.degree
return img.rotate(rotate_degree, Image.BILINEAR),
mask.rotate(rotate_degree, Image.NEAREST)
Note that the input should be PIL format. See this for more information.
Another idea is to stack your image and mask along the channel dimensions and then transform them together. Obviously this only works for geometric-type transforms and you need to use the same dtype for both. I use something like this:
# Apply these to image and mask
affine_transforms = transforms.Compose([
transforms.RandomAffine(degrees=180),
...
])
# Apply these to image only
image_transforms = transforms.Compose([
transforms.GaussianBlur(),
...
])
# Loader...
def __getitem__(self, index: int):
# Get the image and mask, here shape=(HxW) for both
image = self.images[index]
mask = self.masks[index]
# Stack the image and mask together so they get the same geometric transformations
stacked = torch.cat([image, mask], dim=0) # shape=(2xHxW)
stacked = self.affine_transforms(stacked)
# Split them back up again
image, mask = torch.chunk(stacked, chunks=2, dim=0)
# Image transforms are only applied to the image
image = self.image_transforms(image)
return image, mask
In the function read_train_sets() an empty class is created called DataSets. It has no methods or variables. An object called data_sets is then created.
My question is, is data_sets.train an object of the class DataSet().
Or are you creating a method called train() and setting it equal to an object of the DataSet() class.
Note that there are two classes called DataSet and DataSets in the code.
import cv2
import os
import glob
from sklearn.utils import shuffle
import numpy as np
def load_train(train_path, image_size, classes):
images = []
labels = []
img_names = []
cls = []
print('Going to read training images')
for fields in classes:
index = classes.index(fields)
print('Now going to read {} files (Index: {})'.format(fields, index))
path = os.path.join(train_path, fields, '*g')
files = glob.glob(path)
for fl in files:
image = cv2.imread(fl)
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
image = image.astype(np.float32)
image = np.multiply(image, 1.0 / 255.0)
images.append(image)
label = np.zeros(len(classes))
label[index] = 1.0
labels.append(label)
flbase = os.path.basename(fl)
img_names.append(flbase)
cls.append(fields)
images = np.array(images)
labels = np.array(labels)
img_names = np.array(img_names)
cls = np.array(cls)
return images, labels, img_names, cls
class DataSet(object):
def __init__(self, images, labels, img_names, cls):
self._num_examples = images.shape[0]
self._images = images
self._labels = labels
self._img_names = img_names
self._cls = cls
self._epochs_done = 0
self._index_in_epoch = 0
#property
def images(self):
return self._images
#property
def labels(self):
return self._labels
#property
def img_names(self):
return self._img_names
#property
def cls(self):
return self._cls
#property
def num_examples(self):
return self._num_examples
#property
def epochs_done(self):
return self._epochs_done
def next_batch(self, batch_size):
"""Return the next `batch_size` examples from this data set."""
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# After each epoch we update this
self._epochs_done += 1
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end], self._img_names[start:end], self._cls[start:end]
def read_train_sets(train_path, image_size, classes, validation_size):
class DataSets(object):
pass
data_sets = DataSets()
images, labels, img_names, cls = load_train(train_path, image_size, classes)
images, labels, img_names, cls = shuffle(images, labels, img_names, cls)
if isinstance(validation_size, float):
validation_size = int(validation_size * images.shape[0])
validation_images = images[:validation_size]
validation_labels = labels[:validation_size]
validation_img_names = img_names[:validation_size]
validation_cls = cls[:validation_size]
train_images = images[validation_size:]
train_labels = labels[validation_size:]
train_img_names = img_names[validation_size:]
train_cls = cls[validation_size:]
data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
data_sets.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)
return data_sets
You can dynamically assign attributes to your objects in Python. Try inserting hasattr(data_sets, 'train') which asks if data_sets has attribute train after you assign it and see what you get. Also you can call type(data_sets.train) and convince yourself that it is indeed of type DataSet.
data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
This is quite clear since we are assigning a Class object to the data_sets.train
With respect to data_sets object, train and validate will be 2 attributes to it. Hope this helps.