In PyTorch data loader, how could I concatenate an image (let's say x.jpg) in-band wise to each and every input images. ie, in effect I will have 4 band input ( 3 band input jpg with 1 band x.jpg. How to implement it.
Please find below example of my current dataloader just to load the images. To this, I want to add x.jpg to "image"(ie input image, not to mask)
from PIL import Image
class lakeDataSet(Dataset):
def __init__(self, root, transform):
super().__init__()
self.root = root
self.img_dir = os.path.join(root,'image-c3/c3-crop') #9UAV
self.mask_dir = os.path.join(root,'label-c3/c3-crop')
# self.mask_dir = os.path.join(root,'test')
self.files = [fname for fname in os.listdir(self.img_dir) if fname.endswith('.jpg')]
self.transform = transform
def __len__(self):
return len(self.files)
def __getitem__(self,I):
fname = self.files[i]
img_path = os.path.join(self.img_dir, fname)
mask_path = os.path.join(self.mask_dir, fname)
img = self.transform(Image.open(img_path))
mask = self.transform(Image.open(mask_path))
return img, mask
I suppose the self.transform already has ToTensor. Otherwise you should specify it as well.
Then you can just concat the first dimension. Like
x_jpg = self.transform(Image.open('x.jpg'))
img = torch.cat((img, x_jpg), 0)
The x.jpg has to have only 1 channel, if it's an RGB then obviously it'll become 6 channels instead of 4.
Related
I am doing training using Lits (Liver) Dataset using Pytorch. Images are .jpeg and masks are .tiff images.
After doing the preprocessing steps like normalization, shape manipulation, etc. .tiff images are not visible, it is a black image.
visualize(image = image, mask = mask.squeeze()) is giving the image output, but not the mask output.
class Dataset(BaseDataset):
'''
Args:
images_dir (str): path to images folder
masks_dir (str): path to segmentation masks folder
class_values (list): values of classes to extract from segmentation mask
augmentation (albumentations.Compose): data transfromation pipeline
(e.g. flip, scale, etc.)
preprocessing (albumentations.Compose): data preprocessing
(e.g. noralization, shape manipulation, etc.)
'''
CLASSES = ['background', 'liver', 'tumor']
def __init__(self, image_dir, mask_dir, classes = None, augmentation= None, preprocessing=None):
self.images = os.listdir(image_dir)[0:3000]
#self.masks = list(map(lambda x: x.replace(".jpg", "_mask.png"), self.images)) #only for 512x512
#self.masks = list(map(lambda x: x.replace(".jpg", ".png"), self.images))
self.masks = list(map(lambda x: x.replace(".jpg", ".tiff"), self.images))
self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
self.augmentation = augmentation
self.preprocessing = preprocessing
self.image_dir = image_dir
self.mask_dir = mask_dir
def __getitem__(self, i):
# read data
image = cv2.imread(self.image_dir + '/' + self.images[i])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
mask = cv2.imread(self.mask_dir + '/' + self.masks[i], 0)
mask = np.expand_dims(mask, axis = 2)
# masks = [(mask == v) for v in self.class_values]
# mask = np.stack(masks, axis=-1).astype('float')
# print(mask.shape)
# # extract certain classes from mask (e.g. cars)
# masks = [(mask == v) for v in self.class_values]
# mask = np.stack(masks, axis=-1).astype('float')
if self.augmentation:
sample = self.augmentation(image = image, mask= mask)
image, mask = sample['image'], sample['mask']
if self.preprocessing:
sample = self.preprocessing(image = image, mask= mask)
image, mask = sample['image'], sample['mask']
return image, mask
def __len__(self):
return len(self.images)
dataset = Dataset(image_dir = train_frame_path,
mask_dir = train_mask_path,
classes = ['background', 'liver', 'tumor'])
image, mask = dataset[1210]
visualize(image = image, mask = mask.squeeze())
I'm training my neural network using PyTorch framework. The data is full HD images (1920x1080). But in each iteration, I just need to crop out a random 256x256 patch from these images. My network is relatively small (5 conv layers), and hence the bottleneck is being caused by loading the data. I've provided my current code below. Is there any way to optimize loading the data and speed up the training?
Code:
from pathlib import Path
import numpy
import skimage.io
import torch.utils.data as data
import Imath
import OpenEXR
class Ours(data.Dataset):
"""
Loads patches of resolution 256x256. Patches are selected such that they contain atleast 1 unknown pixel
"""
def __init__(self, data_dirpath, split_name, patch_size):
super(Ours, self).__init__()
self.dataroot = Path(data_dirpath) / split_name
self.video_names = []
for video_path in sorted(self.dataroot.iterdir()):
for i in range(4):
for j in range(11):
view_num = i * 12 + j
self.video_names.append((video_path.stem, view_num))
self.patch_size = patch_size
return
def __getitem__(self, index):
video_name, view_num = self.video_names[index]
patch_start_pt = (numpy.random.randint(1080), numpy.random.randint(1920))
frame1_path = self.dataroot / video_name / f'render/rgb/{view_num + 1:04}.png'
frame2_path = self.dataroot / video_name / f'render/rgb/{view_num + 2:04}.png'
depth_path = self.dataroot / video_name / f'render/depth/{view_num + 1:04}.exr'
mask_path = self.dataroot / video_name / f'render/masks/{view_num + 1:04}.png'
frame1 = self.get_image(frame1_path, patch_start_pt)
frame2 = self.get_image(frame2_path, patch_start_pt)
mask = self.get_mask(mask_path, patch_start_pt)
depth = self.get_depth(depth_path, patch_start_pt, mask)
data_dict = {
'frame1': frame1,
'frame2': frame2,
'mask': mask,
'depth': depth,
}
return data_dict
def __len__(self):
return len(self.video_names)
#staticmethod
def get_mask(path: Path, patch_start_point: tuple):
h, w = patch_start_point
mask = skimage.io.imread(path.as_posix())[h:h + self.patch_size, w:w + self.patch_size][None]
return mask
def get_image(self, path: Path, patch_start_point: tuple):
h, w = patch_start_point
image = skimage.io.imread(path.as_posix())
image = image[h:h + self.patch_size, w:w + self.patch_size, :3]
image = image.astype(numpy.float32) / 255 * 2 - 1
image_cf = numpy.moveaxis(image, [0, 1, 2], [1, 2, 0])
return image_cf
def get_depth(self, path: Path, patch_start_point: tuple, mask: numpy.ndarray):
h, w = patch_start_point
exrfile = OpenEXR.InputFile(path.as_posix())
raw_bytes = exrfile.channel('B', Imath.PixelType(Imath.PixelType.FLOAT))
depth_vector = numpy.frombuffer(raw_bytes, dtype=numpy.float32)
height = exrfile.header()['displayWindow'].max.y + 1 - exrfile.header()['displayWindow'].min.y
width = exrfile.header()['displayWindow'].max.x + 1 - exrfile.header()['displayWindow'].min.x
depth = numpy.reshape(depth_vector, (height, width))
depth = depth[h:h + self.patch_size, w:w + self.patch_size]
depth = depth[None]
depth = depth.astype(numpy.float32)
depth = depth * mask
return depth
Finally, I'm creating a DataLoader as follows:
train_data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)
What I've tried so far:
I've searched if it is possible to read a part of the image. Unfortunately, I didn't get any leads. Looks like python libraries read the full image.
I'm planning to read more patches from a single image so that I will need to read fewer images. But in PyTorch framework, the get_item() function has to return a single sample, not a batch. So, in each get_item() I can read only a patch.
I'm planning to circumvent this as follows: Read 4 patches in get_item() and return patches of shape (4,3,256,256) instead of (3,256,256). Later when I read a batch using dataloader, I'll get a batch of shape (BS,4,3,256,256) instead of (BS,3,256,256). I can then concatenate the data along dim=1 to convert (BS,4,3,256,256) to (BS*4,3,256,256). Thus I can reduce batch_size (BS) by 4 and hopefully this will speed up data loading by 4 times.
Are there any other options? I'm open to all kind of suggestions. Thanks!
I have a file containing paths to images I would like to load into Pytorch, while utilizing the built-in dataloader features (multiprocess loading pipeline, data augmentations, and so on).
def create_links():
data_dir = "/myfolder"
full_path_list = []
assert os.path.isdir(data_dir)
for _, _, filenames in os.walk(data_dir):
for filename in filenames:
full_path_list.append(os.path.join(data_dir, filename))
with open(config.data.links_file, 'w+') as links_file:
for full_path in full_path_list:
links_file.write(f"{full_path}\n")
def read_links_file_to_list():
config = ConfigProvider.config()
links_file_path = config.data.links_file
if not os.path.isfile(links_file_path):
raise RuntimeError("did you forget to create a file with links to images? Try using 'create_links()'")
with open(links_file_path, 'r') as links_file:
return links_file.readlines()
So I have a list of files (or a generator, or whatever works), file_list = read_links_file_to_list().
How can I build a Pytorch dataloader around it, and how would I use it?
What you want is a Custom Dataset. The __getitem__ method is where you would apply transforms such as data-augmentation etc. To give you an idea of what it looks like in practice you can take a look at this Custom Dataset I wrote the other day:
class GTSR43Dataset(Dataset):
"""German Traffic Sign Recognition dataset."""
def __init__(self, root_dir, train_file, transform=None):
self.root_dir = root_dir
self.train_file_path = train_file
self.label_df = pd.read_csv(os.path.join(self.root_dir, self.train_file_path))
self.transform = transform
self.classes = list(self.label_df['ClassId'].unique())
def __getitem__(self, idx):
"""Return (image, target) after resize and preprocessing."""
img = os.path.join(self.root_dir, self.label_df.iloc[idx, 7])
X = Image.open(img)
y = self.class_to_index(self.label_df.iloc[idx, 6])
if self.transform:
X = self.transform(X)
return X, y
def class_to_index(self, class_name):
"""Returns the index of a given class."""
return self.classes.index(class_name)
def index_to_class(self, class_index):
"""Returns the class of a given index."""
return self.classes[class_index]
def get_class_count(self):
"""Return a list of label occurences"""
cls_count = dict(self.label_df.ClassId.value_counts())
# cls_percent = list(map(lambda x: (1 - x / sum(cls_count)), cls_count))
return cls_count
def __len__(self):
"""Returns the length of the dataset."""
return len(self.label_df)
I have a Tensorflow input pipeline that reads in two png files (example, label) from disk. I want to tell tensorflow to skip an example/label pair based on a value in the label. Anyone know how to do this?
Here is a simplified example of the input pipeline and with a comment where I want to do the filtering:
import tensorflow as tf
import glob2 as glob
def preprocess_images(impath, labpath):
image = tf.io.read_file(impath)
label = tf.io.read_file(labpath)
image = tf.image.decode_png(image, channels=3)
label = tf.image.decode_png(label, channels=1)
if tf.reduce_sum(label) == 0:
#skip the image and move on to the next, don't include this in the batch
else:
return (image, label)
im_files = glob.glob(impath + '*.png')
lab_files = glob.glob(labpath + '*.png')
files = (im_files, lab_files)
path = tf.data.Dataset.from_tensor_slices(files)
pair = path.map(preprocess_images)
ds = tf.data.Dataset.zip(pair)
ds = ds.batch(64)
The easiest way seems to be to use filter method on your tf.data.Dataset object.
Here I am going to load the label only and filter out the entries with a sum of 0:
def load_label_only(impath, labpath):
label = tf.io.read_file(labpath)
label = tf.image.decode_png(label, channels=1)
return impath, label
# Create the dataset as in your example:
im_files = glob.glob(impath + '*.png')
lab_files = glob.glob(labpath + '*.png')
files = (im_files, lab_files)
ds = tf.data.Dataset.from_tensor_slices(files)
ds = ds.map(load_label_only)
# Here, I am going to keep only non-zero labels:
filtered_ds = ds.filter(lambda image_path, label_map: tf.reduce_sum(label_map) != 0)
# Load the rest of the images...
I have two folders of images and I am trying to build one large image from all the slices in each folder. I need to alternate between folders to build the image. For example the first slice comes from folder 1, the second slice comes from folder 2, the third from folder 1 ect. I have the files ordered by filename in the individual folders so I am trying to iterate through the folders and add a new strip to an image. The code runs BUT it doesn't seem to be saving the composite image. I am new to PIL, so I am sure that it is something simple, but your help is appreciated. Thanks!
def merge_images(file1, file2):
"""Merge two images into one, displayed above and below
:param file1: path to first image file
:param file2: path to second image file
:return: the merged Image object
"""
if file1.startswith('.'):
return None
image1 = Image.open(file1)
image2 = Image.open(file2)
(width1, height1) = image1.size
(width2, height2) = image2.size
result_width = width1 + width2
result_height = max(height1, height2)
result = Image.new('RGB', (result_width, result_height))
result.paste(im=image1, box=(0, 0))
result.paste(im=image2, box=(0, height1))
return result
imageCounter = 0
firstPass = True
compImage = Image.new('RGBA', img.size, (0,0,0,0))
for i in range(len(boundingBoxes)+1):
if firstPass:
compImage = merge_images('./image_processing/'+ os.listdir('./image_processing/')[imageCounter],
'./img_patches/outputs/'+os.listdir('./img_patches/outputs/')[imageCounter])
if compImage is not None:
firstPass = False
compImage.save('./image_processing/compImage.jpg')
else:
compImage = merge_images('./image_processing/compImage.jpg','./image_processing/'+ os.listdir('./image_processing/')[imageCounter])
compImage.save('./image_processing/compImage.jpg')
compImage = merge_images('./image_processing/compImage.jpg','./img_patches/outputs/'+ os.listdir('./img_patches/outputs/')[imageCounter])
compImage.save('./image_processing/compImage.jpg')
imageCounter = imageCounter + 1
You have to tell PIL to save the images:
for i in range(len(boundingBoxes)+1):
if firstPass:
compImage = merge_images('./image_processing/'+ os.listdir('./image_processing/')[imageCounter],
'./img_patches/outputs/'+os.listdir('./img_patches/outputs/')[imageCounter])
compImage.save(open('output/{}.png'.format(imageCounter), 'w'))