The following is my code where I'm converting every image to PIL and then turning them into Pytorch tensors:
transform = transforms.Compose([transforms.PILToTensor()])
# choose the training and test datasets
train_data = os.listdir('data/training/')
testing_data = os.listdir('data/testing/')
train_tensors = []
test_tensors = []
for train_image in train_data:
img = Image.open('data/training/' + train_image)
train_tensors.append(transform(img))
for test_image in testing_data:
img = Image.open('data/testing/' + test_image)
test_tensors.append(transform(img))
# Print out some stats about the training and test data
print('Train data, number of images: ', len(train_data))
print('Test data, number of images: ', len(testing_data))
batch_size = 20
train_loader = DataLoader(train_tensors, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_tensors, batch_size=batch_size, shuffle=True)
# specify the image classes
classes = ['checked', 'unchecked', 'other']
# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()
However, I am getting this error:
RuntimeError: stack expects each tensor to be equal size, but got [4, 66, 268] at entry 0 and [4, 88, 160] at entry 1
This is because my images are not resized prior to PIL -> Tensor. What is the correct way of resizing data images?
Try to utilize ImageFolder from torchvision, and assuming that images have diff size, you can use CenterCrop or RandomResizedCrop depending on your task. Check the Full list.
Here is an example:
train_dir = "data/training/"
train_dataset = datasets.ImageFolder(
train_dir,
transforms.Compose([
transforms.RandomResizedCrop(img_size), # image size int or tuple
# Add more transforms here
transforms.ToTensor(), # convert to tensor at the end
]))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
Related
I am training a neural network with Pytorch, and I would like to understand more of Mnist dataset.
The dataloader looks like this:
batch_size = 128
transform = transforms.Compose([
transforms.Resize((28,28)),
transforms.ToTensor(),
transforms.Normalize((0.5), (0.5)),
])
train_dataset = datasets.MNIST('./data', transform=transform, download=True)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST('./data', transform=transform, download=True, train=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
However, when I train my own dataset there are problems loading the data. What I know is that the Mnist dataset for pytorch has the shape of (1,28,28) which are grayscaled images. I want to know how they are saved. Are they png, jpg, jpeg or npy files?
The MNIST dataset class is based on this code. If you would like to use your own dataset, you should write your custom dataset class to read your dataset based on its properties, like its image size, number of channels, labels, etc.
For instance something like this example:
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = scipyIO.loadmat(img_path).get('rawData')
image = image.astype(np.float64)
h, w = image.shape
image = torch.from_numpy(image).reshape(1, h, w)
image = image.float()
ua = self.img_labels.iloc[idx, 1] # 1: ua value
us = self.img_labels.iloc[idx, 2] # 2: us value
g = self.img_labels.iloc[idx, 3] # 3: g value
gt = torch.tensor([ua, us, g])
gt = gt.float()
if self.transform:
image = self.transform(image)
if self.target_transform:
gt = self.target_transform(gt)
return image, gt
(above example is based on this repository)
I am making multi-class bounding box regression in DICOM images. But the dataset is very big (140 GB). So I can't put all of the training images into memory. I wanted to use a TensorFlow dataset generator from Dicom files and this dataframe.
FilePath
CaseNumber
SliceId
Class
BBox
ImageHeight
ImageWidth
147353
1188164/Seri5/30364335695.dcm
1188164/Seri5
30364335695
Healthy
nan
512
512
147354
16567/Seri1/36459582300.dcm
16567/Seri1
36459582300
Healthy
nan
512
512
147355
19242/Seri1/37897787504.dcm
19242/Seri1
37897787504
Healthy
nan
512
512
147356
16416/Seri2/29526748793.dcm
16416/Seri2
29526748793
Healthy
nan
512
512
147357
26321/Seri2/29284668905.dcm
26321/Seri2
29284668905
Abdominal aort anevrizma
248,177-334,291
512
512
How could I make a data input pipeline (build_dataset function) using tensorflow in order to be able to use just these lines of code to train my model:
train_df, test_df = train_test_split(df, test_size=0.2)
train_images, train_labels, train_bboxes = build_dataset(train_df)
test_images, test_labels, test_bboxes = build_dataset(test_df)
train_targets = {
"class_label": train_labels,
"bounding_box": train_bboxes
}
test_targets = {
"class_label": test_labels,
"bounding_box": test_bboxes
}
history = model.fit(
train_images, train_targets,
validation_data=(test_images, test_targets),
batch_size=32,
epochs=20,)
I have tried this:
def build_dataset(dataframe):
images = tf.data.Dataset.from_tensor_slices(dataframe["FilePath"])
images = images.map(lambda x: tf.py_function(read_dcm_as_array, [x], [tf.string]))
lb = LabelBinarizer()
labels = lb.fit_transform(dataframe["Class"])
labels = tf.data.Dataset.from_tensor_slices(labels)
bboxes = dataframe["BBox"]
bboxes = bboxes.str.replace(",", "-")
bboxes = bboxes.str.split("-", expand=True).astype(int)
bboxes = tf.data.Dataset.from_tensor_slices(bboxes)
#targets = tf.data.Dataset.zip(labels, boxes) # this row gives another error :(
#targets = targets.map(lambda x,y:tf.concat([x,y],axis=-2))
return images, labels, bboxes
But it gives me this error:
ValueError: `y` argument is not supported when using dataset as input.
I understand how to split data into training and validation data using ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
train_path = "/content/drive/train/"
valid_path = "/content/drive/val/"
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator=train_datagen.flow_from_directory(
directory=train_path,
batch_size=32,
class_mode='binary',
target_size=(150,150)
)
validation_generator=test_datagen.flow_from_directory(
directory=valid_path,
batch_size=32,
class_mode='binary',
target_size=(150,150)
)
I've now built and saved a model with this data that I can load:
model = load_model('/content/drive/model_checkpoint.h5')
And I want to predict on a test set with it.
But I feel like I'm doing the prediction a really bulky way, I'm doing:
file_list = ['/content/drive/test/all_classes/' + i for i in os.listdir('/content/drive/test/all_classes/') if i.endswith('.JPEG')]
true_y_list = [1 if i.startswith('cancer') else 0 for i in os.listdir('/content/drive/all_classes/')]
from keras.preprocessing import image
prediction_list = []
for i in file_list:
test_image = image.load_img(i,target_size=(150,150))
images = image.img_to_array(test_image)
images /= 255.0
images = np.expand_dims(images, axis=0)
prediction = model.predict(images)
if prediction < 0.5:
prediction_list.append(0)
else:
prediction_list.append(1)
....my problem is I want to evaluate my model on the test set (i.e. get accuracy and loss with model.evaluate) and I'm not clear how to do that with the way I'm predicting.
I thought about changing the above code to read the images into an np.array and re-shape:
from keras.preprocessing import image
test_image_list = []
prediction_list = []
for i in file_list:
test_image = image.load_img(i,target_size=(150,150))
images = image.img_to_array(test_image)
images /= 255.0
images = np.expand_dims(images, axis=0)
test_image_list.append(images)
prediction = model.predict(images)
if prediction < 0.5:
prediction_list.append(0)
else:
prediction_list.append(1)
test_image_np = np.array(test_image_list)
So then I can process the data the same as the training and validation way, but the shape of test_image_np is (270, 1, 150, 150, 3) and when I try to reshape it to (270,150,150) with test_image_np.reshape(270,150,150), I get the error: cannot reshape array of size 18225000 into shape (270,150,150).
Could someone explain to me how I can read in a set of test images and labels (i.e. not training or validation sets) and predict with my model that I've already made (and evaluate the loss and accuracy of the prediction, which was my original aim).
I am trying to build an image classification program using AutoKeras, Tensorflow, and Pandas.
The code is as folllows:
from keras_preprocessing.image import ImageDataGenerator
import autokeras as ak
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
# directory with subfolders (that contain other subfolders) that contain images
data_dir = "/home/jack/project/"
# dataframe initialization
dataframe = pd.read_excel("/home/jack/project/pathsandlabels.xlsx")
# splitting the dataset
train_dataframe = dataframe.sample(frac=0.75, random_state=200)
test_dataframe = dataframe.drop(train_dataframe.index)
# Augmenting it
datagen = ImageDataGenerator(rescale=1./255., horizontal_flip=True, shear_range=0.6, zoom_range=0.4,
validation_split=0.25)
# Setting up a train generator
train_generator = datagen.flow_from_dataframe(
dataframe=train_dataframe,
directory="/home/jack/project",
x_col="filename",
y_col="assessment",
subset="training",
seed=42,
batch_size=16,
shuffle=True,
class_mode="binary",
target_size=(224, 224)
)
# setting up a validation generator
validation_generator = datagen.flow_from_dataframe(
dataframe=train_dataframe,
directory="/home/jack/project/",
x_col="filename",
y_col="assessment",
subset="validation",
batch_size=16,
seed=42,
shuffle=True,
class_mode="binary",
target_size=(224, 224)
)
# Another augmentation but for test data
test_gen = ImageDataGenerator(rescale=1./255.)
# test generator set up
test_generator = test_gen.flow_from_dataframe(
dataframe=test_dataframe,
directory="/home/jack/project/",
x_col="filename",
y_col=None,
batch_size=16,
seed=42,
shuffle=False,
class_mode=None,
target_size=(224, 224)
)
# this function will yield the variables we need to work with in order to create a train and test set
# it will iterate through the generator
def my_iterator(generator):
for img_batch, targets_batch in generator:
yield test_generator.batch_size, targets_batch
# Train and Validation set creation
# The first problem is here
# 1: Invalid argument: Value Error: 'generator' yielded an element of shape (16,224,224,3) where an element
# of shape (224,) was expected.
train_set = tf.data.Dataset.from_generator(lambda: my_iterator(train_generator), output_shapes=(224, 244),
output_types=(tf.float32, tf.float32))
val_set = tf.data.Dataset.from_generator(lambda: my_iterator(validation_generator), output_shapes=(224, 224),
output_types=(tf.float32, tf.float32))
# we check the output of both validation and train sets
print(train_set)
print(val_set)
# This piece of code is where the other two issues are:
# 2: squeeze(axis=2) gives this error: ValueError: cannot select an axis to squeeze out which has size not equal to one
# 3: Issue 2 can be averted by setting axis=None, but the next problem is plt.show() gives an empty image.
for image, label in train_set.take(1):
print("Image shape: ", image.numpy.shape())
print("Label: ", label.numpy.shape())
plt.imshow(image.numpy()[0].squeeze(axis=2) * 255)
plt.show()
clf = ak.ImageClassifier(overwrite=True, max_trials=1, seed=5)
clf.fit(x=train_set, epochs=20)
print(clf.evaluate(val_set))
I mentioned the issues I face as comments in the code, but I will explain again.
The biggest issue is the first one:Value Error: 'generator' yielded an element of shape (16,224,224,3) where an element of shape (224,) was expected. This happens when I try to initialize my test set.
What I tried:
Changing output_shape to (224,224,3) and (16,224,224,3) (didn't help, threw a different error saying that "The two sequences do not have the same length"
Deleting batch_size from train_generator (this set it back to the default 32 which my pc can't handle)
Changing target_size within the generators to (224,224,3) and (16,224,224,3). didn't work
Changing the number of variables that my_iterator yields. Didn't work (error message: expect n (this is either 3 or 4) values to unpack, got 2)
Changing batch_size to a number by which the total number of images can be divided by (didn't work, throws original error message)
How the data is stored:
Excel. Single sheet. Two columns, A and B. filename and assessment being the column names. Filename is paths to the images (e.g "/subfolder/subfolder/subfolder/A2c3jc3291n.jpeg") but without the quotes obviously.
Assessments are the classes. There are only two in this case.
I'm working on a machine learning process to classify images. My problem is that my dataset is imbalanced, and in my 5 categories of images, I have about 400 images in of one class, and about 20 images of each of the other classes.
I would like to balance my train set by applying data augmentation only to certain classes of my train set.
Here's the code I'm using for creating the train an validation sets:
# Import data
data_dir = pathlib.Path(r"C:\Train set")
# Define train and validation sets (80% - 20%)
batch_size = 32
img_height = 240
img_width = 240
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
And here's how I apply data augmentation, although this would be for the entire train set:
# Apply data augmentation
data_augmentation = keras.Sequential(
[
layers.experimental.preprocessing.RandomFlip("horizontal",
input_shape=(img_height,
img_width,
3)),
layers.experimental.preprocessing.RandomRotation(0.1),
layers.experimental.preprocessing.RandomZoom(0.1),
]
)
Is there any way to go into my train set, extract those categories that have fewer images, and apply data augmentation only to them?
Thanks in advance!
I suggest not using ImageDataGenerator but a customized tf.data.Dataset. In a mapping operation, you can treat categories differently, e.g.:
def preprocess(filepath):
category = tf.strings.split(filepath, os.sep)[0]
read_file = tf.io.read_file(filepath)
decode = tf.image.decode_jpeg(read_file, channels=3)
resize = tf.image.resize(decode, (200, 200))
image = tf.expand_dims(resize, 0)
if tf.equal(category, 'tf_astronauts'):
image = tf.image.flip_up_down(image)
image = tf.image.flip_left_right(image)
# image = tf.image.convert_image_dtype(image, tf.float32)
# category = tf.cast(tf.equal(category, 'tf_astronauts'), tf.int32)
return image, category
Let me demonstrate it. Let's make you a folder with training images:
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from skimage import data
from glob2 import glob
import os
cat = data.chelsea()
astronaut = data.astronaut()
for category, picture in zip(['tf_cats', 'tf_astronauts'], [cat, astronaut]):
os.makedirs(category, exist_ok=True)
for i in range(5):
cv2.imwrite(os.path.join(category, category + f'_{i}.jpg'),
cv2.cvtColor(picture, cv2.COLOR_RGB2BGR))
files = glob('tf_*\\*.jpg')
Now you have these files:
['tf_astronauts\\tf_astronauts_0.jpg',
'tf_astronauts\\tf_astronauts_1.jpg',
'tf_astronauts\\tf_astronauts_2.jpg',
'tf_astronauts\\tf_astronauts_3.jpg',
'tf_astronauts\\tf_astronauts_4.jpg',
'tf_cats\\tf_cats_0.jpg',
'tf_cats\\tf_cats_1.jpg',
'tf_cats\\tf_cats_2.jpg',
'tf_cats\\tf_cats_3.jpg',
'tf_cats\\tf_cats_4.jpg']
Let's apply tranformations only to the astronaut category. Let's use the tf.image transformations.
def preprocess(filepath):
category = tf.strings.split(filepath, os.sep)[0]
read_file = tf.io.read_file(filepath)
decode = tf.image.decode_jpeg(read_file, channels=3)
resize = tf.image.resize(decode, (200, 200))
image = tf.expand_dims(resize, 0)
if tf.equal(category, 'tf_astronauts'):
image = tf.image.flip_up_down(image)
image = tf.image.flip_left_right(image)
# image = tf.image.convert_image_dtype(image, tf.float32)
# category = tf.cast(tf.equal(category, 'tf_astronauts'), tf.int32)
return image, category
Then, we make the tf.data.Dataset:
train = tf.data.Dataset.from_tensor_slices(files).\
shuffle(10).take(4).map(preprocess).batch(4)
And when you iterate the dataset, you'll see that only the astronaut is flipped:
fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, (image, label) in enumerate(zip(images, labels)):
ax = plt.subplot(2, 2, index + 1)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(label.numpy().decode())
ax.imshow(image[0].numpy().astype(int))
plt.show()
Please note, for training you will need to uncomment the two lines in preprocess so it returns an array of floats and an integer.