I am training a neural network with Pytorch, and I would like to understand more of Mnist dataset.
The dataloader looks like this:
batch_size = 128
transform = transforms.Compose([
transforms.Resize((28,28)),
transforms.ToTensor(),
transforms.Normalize((0.5), (0.5)),
])
train_dataset = datasets.MNIST('./data', transform=transform, download=True)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST('./data', transform=transform, download=True, train=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
However, when I train my own dataset there are problems loading the data. What I know is that the Mnist dataset for pytorch has the shape of (1,28,28) which are grayscaled images. I want to know how they are saved. Are they png, jpg, jpeg or npy files?
The MNIST dataset class is based on this code. If you would like to use your own dataset, you should write your custom dataset class to read your dataset based on its properties, like its image size, number of channels, labels, etc.
For instance something like this example:
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = scipyIO.loadmat(img_path).get('rawData')
image = image.astype(np.float64)
h, w = image.shape
image = torch.from_numpy(image).reshape(1, h, w)
image = image.float()
ua = self.img_labels.iloc[idx, 1] # 1: ua value
us = self.img_labels.iloc[idx, 2] # 2: us value
g = self.img_labels.iloc[idx, 3] # 3: g value
gt = torch.tensor([ua, us, g])
gt = gt.float()
if self.transform:
image = self.transform(image)
if self.target_transform:
gt = self.target_transform(gt)
return image, gt
(above example is based on this repository)
Related
The following is my code where I'm converting every image to PIL and then turning them into Pytorch tensors:
transform = transforms.Compose([transforms.PILToTensor()])
# choose the training and test datasets
train_data = os.listdir('data/training/')
testing_data = os.listdir('data/testing/')
train_tensors = []
test_tensors = []
for train_image in train_data:
img = Image.open('data/training/' + train_image)
train_tensors.append(transform(img))
for test_image in testing_data:
img = Image.open('data/testing/' + test_image)
test_tensors.append(transform(img))
# Print out some stats about the training and test data
print('Train data, number of images: ', len(train_data))
print('Test data, number of images: ', len(testing_data))
batch_size = 20
train_loader = DataLoader(train_tensors, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_tensors, batch_size=batch_size, shuffle=True)
# specify the image classes
classes = ['checked', 'unchecked', 'other']
# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()
However, I am getting this error:
RuntimeError: stack expects each tensor to be equal size, but got [4, 66, 268] at entry 0 and [4, 88, 160] at entry 1
This is because my images are not resized prior to PIL -> Tensor. What is the correct way of resizing data images?
Try to utilize ImageFolder from torchvision, and assuming that images have diff size, you can use CenterCrop or RandomResizedCrop depending on your task. Check the Full list.
Here is an example:
train_dir = "data/training/"
train_dataset = datasets.ImageFolder(
train_dir,
transforms.Compose([
transforms.RandomResizedCrop(img_size), # image size int or tuple
# Add more transforms here
transforms.ToTensor(), # convert to tensor at the end
]))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
I'm working on a machine learning process to classify images. My problem is that my dataset is imbalanced, and in my 5 categories of images, I have about 400 images in of one class, and about 20 images of each of the other classes.
I would like to balance my train set by applying data augmentation only to certain classes of my train set.
Here's the code I'm using for creating the train an validation sets:
# Import data
data_dir = pathlib.Path(r"C:\Train set")
# Define train and validation sets (80% - 20%)
batch_size = 32
img_height = 240
img_width = 240
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
And here's how I apply data augmentation, although this would be for the entire train set:
# Apply data augmentation
data_augmentation = keras.Sequential(
[
layers.experimental.preprocessing.RandomFlip("horizontal",
input_shape=(img_height,
img_width,
3)),
layers.experimental.preprocessing.RandomRotation(0.1),
layers.experimental.preprocessing.RandomZoom(0.1),
]
)
Is there any way to go into my train set, extract those categories that have fewer images, and apply data augmentation only to them?
Thanks in advance!
I suggest not using ImageDataGenerator but a customized tf.data.Dataset. In a mapping operation, you can treat categories differently, e.g.:
def preprocess(filepath):
category = tf.strings.split(filepath, os.sep)[0]
read_file = tf.io.read_file(filepath)
decode = tf.image.decode_jpeg(read_file, channels=3)
resize = tf.image.resize(decode, (200, 200))
image = tf.expand_dims(resize, 0)
if tf.equal(category, 'tf_astronauts'):
image = tf.image.flip_up_down(image)
image = tf.image.flip_left_right(image)
# image = tf.image.convert_image_dtype(image, tf.float32)
# category = tf.cast(tf.equal(category, 'tf_astronauts'), tf.int32)
return image, category
Let me demonstrate it. Let's make you a folder with training images:
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from skimage import data
from glob2 import glob
import os
cat = data.chelsea()
astronaut = data.astronaut()
for category, picture in zip(['tf_cats', 'tf_astronauts'], [cat, astronaut]):
os.makedirs(category, exist_ok=True)
for i in range(5):
cv2.imwrite(os.path.join(category, category + f'_{i}.jpg'),
cv2.cvtColor(picture, cv2.COLOR_RGB2BGR))
files = glob('tf_*\\*.jpg')
Now you have these files:
['tf_astronauts\\tf_astronauts_0.jpg',
'tf_astronauts\\tf_astronauts_1.jpg',
'tf_astronauts\\tf_astronauts_2.jpg',
'tf_astronauts\\tf_astronauts_3.jpg',
'tf_astronauts\\tf_astronauts_4.jpg',
'tf_cats\\tf_cats_0.jpg',
'tf_cats\\tf_cats_1.jpg',
'tf_cats\\tf_cats_2.jpg',
'tf_cats\\tf_cats_3.jpg',
'tf_cats\\tf_cats_4.jpg']
Let's apply tranformations only to the astronaut category. Let's use the tf.image transformations.
def preprocess(filepath):
category = tf.strings.split(filepath, os.sep)[0]
read_file = tf.io.read_file(filepath)
decode = tf.image.decode_jpeg(read_file, channels=3)
resize = tf.image.resize(decode, (200, 200))
image = tf.expand_dims(resize, 0)
if tf.equal(category, 'tf_astronauts'):
image = tf.image.flip_up_down(image)
image = tf.image.flip_left_right(image)
# image = tf.image.convert_image_dtype(image, tf.float32)
# category = tf.cast(tf.equal(category, 'tf_astronauts'), tf.int32)
return image, category
Then, we make the tf.data.Dataset:
train = tf.data.Dataset.from_tensor_slices(files).\
shuffle(10).take(4).map(preprocess).batch(4)
And when you iterate the dataset, you'll see that only the astronaut is flipped:
fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, (image, label) in enumerate(zip(images, labels)):
ax = plt.subplot(2, 2, index + 1)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(label.numpy().decode())
ax.imshow(image[0].numpy().astype(int))
plt.show()
Please note, for training you will need to uncomment the two lines in preprocess so it returns an array of floats and an integer.
I want to play around with a neural network that recognizes handwritten numbers. I found some of these on the web which use PyTorch, however they seem to download the data from the MNIST website in a particular format. My data is, however, available as follows:
with np.load('prediction-challenge-01-data.npz') as fh:
data_x = fh['data_x']
data_y = fh['data_y']
Where data_x is the training data and data_y are the labels of the pictures. I want these data sets to be in the same format as trainloader as shown below:
trainset = datasets.MNIST('/data/mnist', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
Where trainloader already has the training set data_x and labels data_y together in one set.
Is there any way to do this?
Edit: Shapes of data_x and data_y:
In [1]: data_x.shape
Out[2]: (20000, 1, 28, 28)
In [5]: data_y.shape
Out[7]: (20000,)
You can easily create your own dataset. Just inherit from torch.utils.data.Dataset and implement
__getitem__ at the very least:
Here is a quick and dirty example to get you going:
class YourOwnDataset(torch.utils.data.Dataset):
def __init__(self, input_file_path, transformations) :
super().__init__()
self.path = input_file_path
self.transforms = transformations
with np.load(self.path) as fh:
# I assume fh['data_x'] is a list you get the idea
self.data = fh['data_x']
self.labels = fh['data_y']
# in getitem, we retrieve one item based on the input index
def __getitem__(self, index):
data = self.data[index]
# based on the loss you chose and what you have in mind,
# you can transform you label, here I assume they are
# integer numbers (like, 1, 3, etc as labels used for classification)
label = self.labels[index]
img = convert/reshape your data into img
img = self.transforms(img)
return img, labels
def __len__(self):
return len(self.data)
and you can create your dataset like :
from torchvision import transforms
# add any number of transformations you like, I just added ToTensor()
transformations = transforms.Compose([transforms.ToTensor()])
trainset = YourOwnDataset('prediction-challenge-01-data.npz', transformations )
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
My goal is it to feed a Keras model of an Autencoder only the (batches of) features from a tf.data.Dataset object.
Im loading the Dataset, format the Images and creating Batches like this:
#load dataset
(raw_train, raw_validation, raw_test), metadata = tfds.load(
'cats_vs_dogs',
split=[
tfds.Split.TRAIN.subsplit(tfds.percent[:80]),
tfds.Split.TRAIN.subsplit(tfds.percent[80:90]),
tfds.Split.TRAIN.subsplit(tfds.percent[90:])],
with_info=True,
as_supervised=True,
)
#normalize and resize images
IMG_SIZE = 160
def format_example(self, image, label):
image = tf.cast(image, tf.float32)
image = (image/255.0)
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)
#create batches
SHUFFLE_BUFFER_SIZE = 1000
BATCH_SIZE = 32
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)
And at this point i would like to seperate the batches in features and labels, something like this:
train_x_batches, train_y_batches = train_batches
But i get this error:
`ValueError Traceback (most recent call last)
in
----> 1 train_x_batches, train_y_batches = train_batches
ValueError: too many values to unpack (expected 2)`
I get the same problem and I solved it like this:
train_x_batches = np.concatenate([x for x, y in train_batches], axis=0)
train_y_batches = np.concatenate([y for x, y in train_batches], axis=0)
And you can go back to your classes label using:
train_batches.class_names
If you need only features for your autoencoder, you can slice them via map:
train_x_batches = train_batches.map(lambda x: x[0])
Of course, you can do the same thing for your labels:
train_y_batches = train_batches.map(lambda x: x[1])
I have 1500 RGB files(.jpg) and 1500 feature map values(.npy). I want to use them as a dataset for my deep learning project. I am using tensorflow 1.12.
I wrote them into a .tfrecords file using the tf.Example. Here is the code I used to import this file with tf.data(Thanks to Uday's comment).
import tensorflow as tf
import numpy as np
import pdb
IMAGE_HEIGHT = 228
IMAGE_WIDTH = 304
def tfdata_generator(tfrname, is_training, batch_size):
'''Construct a data generator using tf.Dataset'''
## You can write your own parse function
def parse_function(example):
features = tf.parse_single_example(example, features={
'image_raw': tf.FixedLenFeature([], tf.string, default_value=""),
'hint_raw': tf.FixedLenFeature([], tf.string, default_value="")
})
image = features['image_raw']
hint = features['hint_raw']
image = tf.decode_raw(image, tf.uint8)
image = tf.cast(image, tf.float32)
image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, 3])
hint = tf.decode_raw(hint, tf.uint8)
hint = tf.cast(hint, tf.float32)
hint = tf.reshape(hint, [8, 10, 1024])
return image, hint
dataset = tf.data.TFRecordDataset(tfrname)
#pdb.set_trace()
if is_training:
dataset = dataset.shuffle(100) # depends on sample size
#pdb.set_trace()
# Transform and batch data at the same time
dataset = dataset.apply(tf.data.experimental.map_and_batch(parse_function,
8, num_parallel_batches=4)) # cpu cores
dataset = dataset.repeat(-1)
dataset = dataset.prefetch(2)
return dataset
I set the batch_size to be 8. But when I did the debugging, the shape of the dataset is
((?, 228, 304, 3), (?, 8, 10, 1024)), types: (tf.float32, tf.float32)
Is this correct? Is this code wrong? Or there are mistakes when I making the tfrecords?.
you can use code like below,
def tfdata_generator(images, labels, is_training, batch_size=32):
'''Construct a data generator using tf.Dataset'''
## You can write your own parse function
def parse_function(filename, label):
image_string = tf.read_file(filename)
image = tf.image.decode_jpeg(image_string)
image = tf.image.convert_image_dtype(image, tf.float32)
y = tf.one_hot(tf.cast(label, tf.uint8), 16)
return image, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply(tf.data.experimental.map_and_batch( parse_function,
batch_size,num_parallel_batches=6, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch(no_of_prefetch_needed)
return dataset