Related
I have a set of bags of frames obtained from YouTube videos and I would like to return an entire bag when I iterate my dataset. My custom dataset class is the following one:
dataset_path = Path('/content/VideoClassificationDataset')
class VideoDataset(Dataset):
def __init__(self, dictionary, transform = None):
self.l_dict = list(dictionary.items())
self.transform = transform
def __len__(self):
return len(self.l_dict)
def __get_item__(self, index):
item = self.l_dict[index]
images_path = item[0]
images = [Image.open(f'{dataset_path}/{images_path}/{image}') for image in os.listdir(f'{dataset_path}/{images_path}')]
y_labels = torch.tensor(item[1])
if self.transform:
for image in images: self.transform(image)
return images, y_labels
Moreover I’ve done also
def spit_train(train_data, perc_val_size):
train_size = len(train_data)
val_size = int((train_size * perc_val_size) // 100)
train_size -= val_size
return random_split(train_data, [int(train_size), int(val_size)])
train_data, val_data = spit_train(VideoDataset(train_dict, transform=train_transform()), 20)
test_data = VideoDataset(dictionary=test_dict, transform=test_transform())
BATCH_SIZE = 16
NUM_WORKERS = os.cpu_count()
def generate_dataloaders(train_data, test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS):
train_dl = DataLoader(dataset = train_data,
batch_size = BATCH_SIZE,
num_workers = NUM_WORKERS,
shuffle = True)
val_dl = DataLoader(dataset = val_data,
batch_size = BATCH_SIZE,
num_workers = NUM_WORKERS,
shuffle = True)
test_dl = DataLoader(dataset = test_data,
batch_size = BATCH_SIZE,
num_workers = NUM_WORKERS,
shuffle = False) # don't need to shuffle testing data when we are considering time series dataset
return train_dl, val_dl, test_dl
train_dl, val_dl, test_dl = generate_dataloaders(train_data, test_data)
The train_dict and test_dict are dictionaries that contains the path of each bag of shots as key and the list of labels as value, like so:
{'train/iqGq-8vHEJs/bag_of_shots0': [2],
'train/iqGq-8vHEJs/bag_of_shots1': [2],
'train/gnw83R8R6jU/bag_of_shots0': [119],
'train/gnw83R8R6jU/bag_of_shots1': [119],
...
}
The point is that when I try to see what the dataloader contains:
train_features_batch, train_labels_batch = next(iter(train_dl))
print(train_features_batch.shape, train_labels_batch.shape)
val_features_batch, val_labels_batch = next(iter(val_dl))
print(val_features_batch.shape, val_labels_batch.shape)
I get:
NotImplementedError: Caught NotImplementedError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataset.py", line 295, in __getitem__
return self.dataset[self.indices[idx]]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataset.py", line 53, in __getitem__
raise NotImplementedError
NotImplementedError
I’m not particularly sure If I can return a set of images in my get_item() function at this point.
There is a typo in the function name, instead of __get_item__, the name should be __getitem__.
Since this is not defined in your custom dataset class, the function from the base class (torch.utils.data.Dataset) is used, which doesn't implement this since it needs to be implemented by each dataset that inherits from this class. So you get an NotImplementedError.
More documentation on this can be found here.
I am building a multiclass segmentation model using DeepLapv3+ and ResNet50 to detect facial parts. I started off with this tutorial but altered much of the code for my use case.
In this block, I am processing my data:
# CIHP has 20 labels and Headsegmentation has 14 labels
image_size = 512
batch = 4
labels = 14
data_directory = "/content/headsegmentation_final/"
sample_train_images = len(os.listdir(data_directory + 'Training/Images/')) - 1
sample_validation_images = len(os.listdir(data_directory + 'Validation/Images/')) - 1
print('Train size: ' + str(sample_train_images))
print('Validation size: ' + str(sample_validation_images))
t_images = sorted(glob(os.path.join(data_directory, "Training/Images/*")))[:sample_train_images]
t_masks = sorted(glob(os.path.join(data_directory, "Training/Category_ids/*")))[:sample_train_images]
v_images = sorted(glob(os.path.join(data_directory, "Validation/Images/*")))[:sample_validation_images]
v_masks = sorted(glob(os.path.join(data_directory, "Validation/Category_ids/*")))[:sample_validation_images]
def image_augmentation(img, random_range):
img = tf.image.random_flip_left_right(img)
img = tfa.image.rotate(img, random_range)
return img
def image_process(path, mask=False):
img = tf.io.read_file(path)
upper = 90 * (math.pi/180.0) # degrees -> radian
lower = 0 * (math.pi/180.0)
ran_range = random.uniform(lower, upper)
if mask == True:
img = tf.image.decode_png(img, channels=3)
img.set_shape([None, None, 3])
img = tf.image.resize(images=img, size=[image_size, image_size])
#img = image_augmentation(img, ran_range)
else:
img = tf.image.decode_jpeg(img, channels=3)
img.set_shape([None, None, 3])
img = tf.image.resize(images=img, size=[image_size, image_size])
img = img / 127.5 - 1
#img = image_augmentation(img, ran_range)
return img
def data_loader(image_list, mask_list):
img = image_process(image_list)
mask = image_process(mask_list, mask=True)
return img, mask
def data_generator(image_list, mask_list):
cihp_dataset = tf.data.Dataset.from_tensor_slices((image_list, mask_list))
cihp_dataset = cihp_dataset.map(data_loader, num_parallel_calls=tf.data.AUTOTUNE)
cihp_dataset = cihp_dataset.batch(batch, drop_remainder=True)
return cihp_dataset
train_dataset = data_generator(t_images, t_masks)
val_dataset = data_generator(v_images, v_masks)
print("Train Dataset:", train_dataset)
print("Val Dataset:", val_dataset)
I am using the head-segmentation dataset by mut1ny, which has 14 labels. The images are jpg while the masks are png.
Here is where I am trying to run my epochs:
loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss=loss, metrics=["accuracy"])
history = model.fit(train_dataset, validation_data = val_dataset, epochs = 50)
But this is the error I am met with:
Epoch 1/50
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-c69b991cd650> in <module>()
3 model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss=loss, metrics=["accuracy"])
4
----> 5 history = model.fit(train_dataset, validation_data = val_dataset, epochs = 50)
6
7 plt.plot(history.history["loss"])
1 frames
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 860, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 919, in compute_loss
y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1863, in sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 5203, in sparse_categorical_crossentropy
labels=target, logits=output)
ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(3145728,) and logits.shape=(1048576, 14)
Update
Here is an example mask:
This is how I am loading the model:
def DeepLabV3_ResNet50(size, classes):
input = keras.Input(shape=(size, size, 3))
resnet50 = keras.applications.ResNet50(weights="imagenet", include_top=False, input_tensor = input)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DSP_pooling(x)
a = layers.UpSampling2D(size=(size // 4 // x.shape[1], size // 4 // x.shape[2]),interpolation="bilinear",)(x)
b = resnet50.get_layer("conv2_block3_2_relu").output
b = block(b, filters = 48, kernel = 1)
x = layers.Concatenate(axis=-1)([a, b])
x = block(x)
x = block(x)
x = layers.UpSampling2D(size=(size // x.shape[1], size // x.shape[2]),interpolation="bilinear",)(x)
output = layers.Conv2D(3, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs = input, outputs = output)
model = DeepLabV3_ResNet50(size = image_size, classes = labels)
model.summary()
I've adjusted the shape of the labels. Both the images and masks are RGB. The necessary adjustments were made during dataset processing. Where am I going wrong?
So, the problem is: you have a 3-channel image, but 14 classes.
You need to transform the images in 14 classes for them to be compatible with the model's output. And, of course, the last layer of the model must have 14 filters to output 14 classes.
So, in a previous phase, before training, you must create a dictionary of colors and attribute an index for each.
One suggestion is to sum RGB in a way it can't be repeated.
Such as:
images = load_a_number_of_images_that_have_all_colors()
images = numpy.concatenate(images, axis=0) #maybe not necessary
#the goal above is to load a shape like (anything, anything, channels)
color_keys = images[:,:,0]*1000000 + images[:,:,1]*1000 + images[:,:,2]
color_keys = numpy.unique(color_keys)
color_dictionary = {key: i for i, key in enumerate(color_keys)
Now that you have a color dictionary, I'm not sure what the best strategy is (preprocess the images and save their index arrays, do it on the fly in the loader, etc.). Choose something that will run fast. I believe saving each array would be an interesting idea, but then you'd have to change the loader a lot.
The goal is: in the loader, transform the images into arrays of indices.
def image_process(path, mask=False):
#blablablabla
if mask == True:
img = tf.image.decode_png(img, channels=3)
img.set_shape([None, None, 3])
img = tf.image.resize(images=img, size=[image_size, image_size],
method=ResizeMethod.NEAREST_NEIGHBOR) #this is extremely important
#you don't want slight changes due to interpolation
#img = image_augmentation(img, ran_range) #make sure you use "nearest" here too!!
img.set_shape([None, 3]) #flattened to make it easier
color_keys = img[:,0]*1000000 + img[:,1]*1000 + img[:,2]
img = tf.constant([color_dictionary[k] for k in color_keys])
img.set_shape([image_size, image_size]) #having an image shape again
I'm trying to run this code but, I got the error but I'm not sure the error is because of the address file or something else.
The datasets file contains images of the cervix.
The images are organized in their labelled categories: Type_1, Type_2, and Type_3
This is the code:
def im_multi(path):
try:
im_stats_im_ = Image.open(path)
return [path, {'size': im_stats_im_.size}]
except:
print(path)
return [path, {'size': [0,0]}]
def im_stats(im_stats_df):
im_stats_d = {}
p = Pool(cpu_count())
ret = p.map(im_multi, im_stats_df['path'])
for i in range(len(ret)):
im_stats_d[ret[i][0]] = ret[i][1]
im_stats_df['size'] = im_stats_df['path'].map(lambda x: ' '.join(str(s) for s in im_stats_d[x]['size']))
return im_stats_df
def get_im_cv2(path):
img = cv2.imread(path)
resized = cv2.resize(img, (32, 32), cv2.INTER_LINEAR) #use cv2.resize(img, (64, 64), cv2.INTER_LINEAR)
return [path, resized]
def normalize_image_features(paths):
imf_d = {}
p = Pool(cpu_count())
ret = p.map(get_im_cv2, paths)
for i in range(len(ret)):
imf_d[ret[i][0]] = ret[i][1]
ret = []
fdata = [imf_d[f] for f in paths]
fdata = np.array(fdata, dtype=np.uint8)
fdata = fdata.transpose((0, 3, 1, 2))
fdata = fdata.astype('float32')
fdata = fdata / 255
return fdata
#train = glob.glob('../input/train/**/*.jpg') + glob.glob('../input/additional/**/*.jpg')
train=glob.glob('D:\\Test cods\\KerasCNNClean\\data\\train\\Type_1\\*.jpg')
+glob.glob('D:\\Test cods\\KerasCNNClean\\data\\train\\Type_2\\*.jpg') +glob.glob('D:\\Test
cods\\KerasCNNClean\\data\\train\\Type_3\\*.jpg')
train = pd.DataFrame([[p.split('/')[3],p.split('/')[4],p] for p in train], columns =
['type','image','path'])[::5] #limit for Kaggle Demo
train = im_stats(train)
train = train[train['size'] != '0 0'].reset_index(drop=True) #corrupt images removed
print("Bad images removed")
print("loading test data")
train_data = normalize_image_features(train['path'])
print("test data loaded")
#np.save('train.npy', train_data, allow_pickle=True, fix_imports=True)
np.save(r'D:/Test cods/KerasCNNClean/npyTrain.train.npy', train_data, allow_pickle=True,
fix_imports=True)
le = LabelEncoder()
train_target = le.fit_transform(train['type'].values)
print(le.classes_)
#np.save('train_target.npy', train_target, allow_pickle=True, fix_imports=True)
np.save(r'D:/Test cods/KerasCNNClean/npyTrain.train_target.npy', train_target,
allow_pickle=True, fix_imports=True)
I couldn't find what is the problem:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-46-4f89beac4d83> in <module>
1 test = glob.glob('D:\\Test cods\\KerasCNNClean\\data\\test\\*.jpg')
----> 2 test = pd.DataFrame([[p.split('/')[3],p] for p in test], columns = ['image','path']) #[::20] #limit for Kaggle Demo
3 print("loading train data")
4 test_data = normalize_image_features(test['path'])
5 np.save(r'D:/Test cods/KerasCNNClean/npyTest.test.npy', test_data, allow_pickle=True, fix_imports=True)
<ipython-input-46-4f89beac4d83> in <listcomp>(.0)
1 test = glob.glob('D:\\Test cods\\KerasCNNClean\\data\\test\\*.jpg')
----> 2 test = pd.DataFrame([[p.split('/')[3],p] for p in test], columns = ['image','path']) #[::20] #limit for Kaggle Demo
3 print("loading train data")
4 test_data = normalize_image_features(test['path'])
5 np.save(r'D:/Test cods/KerasCNNClean/npyTest.test.npy', test_data, allow_pickle=True, fix_imports=True)
IndexError: list index out of range
I have been trying to build a deep learning model, using Keras as API with tensorflow 2.2 and cuda 102.89. My dataset is "relatively big" (27500 400x400 images) and so I've tried using both keras.utils.sequence (here) and tf.keras.preprocessing.image.ImageDataGenerator (here) to fit these images into the memory using batches. Yet, neither of these worked so far, I must be not using it correctly but I can't see what's wrong with my code.
Using the keras.utils.sequence:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from tensorflow import keras
class BatchGenerator(keras.utils.Sequence):
def __init__(self, inputs, targets, batch_size=32):
self.inputs = inputs
self.targets = targets
self.batch_size = batch_size
def __len__(self):
return len(self.targets) // self.batch_size
def __getitem__(self, idx):
i = idx * self.batch_size
x = self.inputs[i : i + self.batch_size]
y = self.targets[i : i + self.batch_size]
return x, y
# Load images and labels with normalization and one hot encoding
# images: np.array, (27512, 480, 480, 3); labels: np.array, (27512, 480, 480, 9); nb_class = 9
images, labels, nb_class = data_loader('../Data/Images', '../Data/labels')
# Creation of the 0.2 validation split
train = list(range(images.shape[0]//5, images.shape[0]))
test = list(range(0, images.shape[0]//5))
# Batch generation
train_gen = BatchGenerator(images[train], labels[train], batch_size, data_aug)
valid_gen = BatchGenerator(images[test], labels[test], batch_size, data_aug)
# Model fitting
history = model.fit(train_gen, epochs=nb_epoch, verbose=1,
validation_data=valid_gen, shuffle=False, callbacks=callbacks)
Using ImageDataGeneratorlink and flow link:
from keras.preprocessing.image import ImageDataGenerator
# Load images and labels with normalization and one hot encoding
# images: np.array, (27512, 480, 480, 3); labels: np.array, (27512, 480, 480, 9); nb_class = 9
images, labels, nb_class = data_loader('../Data/Images', '../Data/labels')
# Batch generation with data augmentation instance
datagen = ImageDataGenerator(
vertical_flip=True,
horizontal_flip=True,
preprocessing_function=augmentations_color,
samplewise_center=True,
samplewise_std_normalization=True,
validation_split=0.2
)
# Batch generation application
train_gen = datagen.flow(images, labels, batch_size=16,
shuffle=True, subset='training')
valid_gen = datagen.flow(images, labels, batch_size=16,
shuffle=True, subset='validation')
# Model fitting
history = model.fit(train_gen, epochs=nb_epoch, verbose=1,
validation_data=valid_gen, shuffle=False, callbacks=callbacks)
In both cases, the code crashes during BatchGenerator and never reach the model fitting part complaining:
Traceback (most recent call last):
File "main.py", line 190, in <module>
conf_file=train_set.cfg_path)
File "main.py", line 92, in main
shuffle=True, subset='training')
File "/hpc_htom/kjam268/Virtual_ENV/HistoTAG/lib/python3.6/site-packages/keras_preprocessing/image/image_data_generator.py", line 434, in flow
dtype=self.dtype
File "/hpc_htom/kjam268/Virtual_ENV/HistoTAG/lib/python3.6/site-packages/keras_preprocessing/image/numpy_array_iterator.py", line 103, in __init__
np.unique(y[split_idx:]))):
File "<__array_function__ internals>", line 6, in unique
File "/hpc_htom/kjam268/Virtual_ENV/HistoTAG/lib/python3.6/site-packages/numpy/lib/arraysetops.py", line 261, in unique
ret = _unique1d(ar, return_index, return_inverse, return_counts)
File "/hpc_htom/kjam268/Virtual_ENV/HistoTAG/lib/python3.6/site-packages/numpy/lib/arraysetops.py", line 314, in _unique1d
ar = np.asanyarray(ar).flatten()
numpy.core._exceptions.MemoryError: Unable to allocate 189. GiB for an array with shape (50711040000,) and data type float32
I have trimmed the code to the essential but I can post all of it if needed. I tried both scenarios with different batch as small as 1 without improvements.
Any ideas of what I could try to be able to train my model ?
Thank you
You can follow this official instruction Image segmentation with a U-Net-like architecture. Here is the most relevant coding part for your case.
Generator
class BatchGenerator(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
# Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
y[j] -= 1
return x, y
DataSet
input_dir = "images/"
target_dir = "annotations/trimaps/"
batch_size = 32
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".jpg")
]
)
target_img_paths = sorted(
[
os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".png") and not fname.startswith(".")
]
)
print("Number of samples:", len(input_img_paths))
for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
print(input_path, "|", target_path)
Data Generator
# Split our img paths into a training and a validation set
val_samples = 1000
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
# Instantiate data Sequences for each split
train_gen = BatchGenerator(
batch_size, img_size, train_input_img_paths, train_target_img_paths
)
val_gen = BatchGenerator(batch_size, img_size, val_input_img_paths,
val_target_img_paths)
Im trying to make my first CNN using pyTorch and am following online help and code already people wrote. i am trying to reproduce their results. I'm using the Kaggle Dogs Breed Dataset for this and below is the error I get. The trainloader does not return my images and labels and any attempt to get them leads in an error:
Traceback (most recent call last):
File "E:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1664, in <module>
main()
File "E:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1658, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "E:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1068, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "E:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/sbzfk/PycharmProjects/my_FCN_attempt/Kaggle_Dogs_Competition.py", line 85, in <module>
img, label = next(iter(train_loader))
File "C:\Users\sbzfk\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\utils\data\dataloader.py", line 314, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "C:\Users\sbzfk\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\utils\data\dataloader.py", line 314, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "C:/Users/sbzfk/PycharmProjects/my_FCN_attempt/Kaggle_Dogs_Competition.py", line 42, in __getitem__
img = self.transform(img)
File "C:\Users\sbzfk\AppData\Local\Programs\Python\Python37\lib\site-packages\torchvision\transforms.py", line 34, in __call__
img = t(img)
File "C:\Users\sbzfk\AppData\Local\Programs\Python\Python37\lib\site-packages\torchvision\transforms.py", line 187, in __call__
w, h = img.size
TypeError: cannot unpack non-iterable int object
Below is my code:
class DogsDataset(Dataset):
def __init__(self, filenames, labels, root_dir, transform=None):
assert len(filenames) == len(labels) # if the two are not of equal length throw an error
self.filenames = filenames
self.labels = labels
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.filenames)
def __getitem__(self, idx):
this_img = join(self.root_dir, 'train', self.filenames[idx]+'.jpg')
print(this_img)
img = io.imread(this_img)
label = self.labels[idx]
print(label)
if self.transform:
img = self.transform(img)
return [img, label]
batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_root = expanduser(join('~', 'Documents', 'kaggle_dogs_dataset'))
# join will intelligently join directories irrespective of OS, and expanduser will
# replace with /home/ in linux or the username in Windows
csv_file = pd.read_csv(join(dataset_root, 'labels.csv')) # csv file has two columns, id which are filenames and breed which are labels
filenames = csv_file.id.values # convert that column to an array, id is the column name and values converty to numpy array
# le = LabelEncoder()
# labels = le.fit_transform(csv_file.breed) # this will just encode the names between 0 to models-1 , basically changing strings to integers
labels = csv_file.breed.values
filenames_train, filenames_eval, labels_train, labels_eval = train_test_split(filenames, labels,
test_size=0.1, stratify=labels) # this is an import from sklearn as the name implies, it randomly splits data into train and eval, 10% of it to test and rest train
data_transform = transforms.Compose([transforms.Scale(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
dog_train = DogsDataset(filenames_train, labels_train, dataset_root, transform=data_transform)
train_loader = DataLoader(dog_train, batch_size, shuffle=True)
dog_eval = DogsDataset(filenames_eval, labels_eval, dataset_root, transform=data_transform)
eval_loader = DataLoader(dog_eval, batch_size, shuffle=True)
def im_show(axis, inp):
"""Denormalize and show"""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
axis.imshow(inp)
img, label = next(iter(train_loader))
print(img.size(), label.size())
fig = plt.figure(1, figsize=(16, 4))
grid = ImageGrid(fig, 111, nrows_ncols=(1, 4), axes_pad=0.05)
for i in range(img.size()[0]):
ax = grid[i]
im_show(ax, img[i])
Ive tried debugging it line by line and with transform=none I seem to read all the images, only with transform=data_transform I seem to get this error.
It seems like you are using torchvision's image transforms. Some of these transforms are expecting as input a PIL.Image object, rather than a tensor or numpy array.
You are using io.imread to read ths image file, and I suspect this io is not PIL.Image resulting with a numpy array.
Make sure you pass PIL.Image objects to transforms and that your DogsDataset returns a 3D tensor for image (C-H-W shaped).