Could someone please explain to me why this code:
import torch
from torch_geometric.datasets import TUDataset
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from torch_geometric.data import Data, Dataset,DataLoader,DenseDataLoader,InMemoryDataset
from torch_geometric.data import Data, Dataset
from sklearn import preprocessing
device = torch.device('cpu')
torch.backends.cudnn.benchmark = True
import joblib
edge_origins = [0,1,2,3,4,5,6,7,8,10,11,12,13]
edge_destinations = [1,2,3,4,5,6,7,8,9,11,12,13,14]
target = [0,1]
x = [[0.1,0.5,0.2],[0.5,0.6,0.23]]
edge_index = torch.tensor([edge_origins, edge_destinations], dtype=torch.long)
x = torch.tensor(x, dtype=torch.float)
y = torch.tensor(target, dtype=torch.long)
dataset = Data(x=x, edge_index=edge_index, y=y, num_classes = len(set(target))) #making the graph of nodes and edges
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)
for x,y in train_loader:
print(x)
Generates this error:
for x,y in train_loader:
File "/root/miniconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 346, in __next__
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/root/miniconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/root/miniconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/root/miniconda3/lib/python3.7/site-packages/torch_geometric/data/data.py", line 92, in __getitem__
return getattr(self, key, None)
TypeError: getattr(): attribute name must be string
Edit 1, as an update: if I type:
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)
it = iter(train_loader)
print(it)
It returns:
<torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7f4aeb009590>
but then if I try to iterate through this object like this:
for x,i in enumerate(it):
print(i)
it returns the same error as before.
Edit 2: Just to mention I am not particularly interested in printing out the data loader attributes, but the next thing I want to do is feed the data loader into the below code, and when I run the below code with the current data loader, I get the error described above about the attribute name must be string when I run the for data in train_loader line of the train() function:
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
for data in train_loader: # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
for epoch in range(1, 171):
train()
train_acc = test(train_loader)
test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
Just do:
for x,i in enumerate(trainloader):
DataLoader objects are already iterables, and enumerate adds a counter to an iterable object. iter produces an iterator over the DataLoader object, and I suspect calling enumerate on an iterator rather than an iterable is resulting in the above error.
Related
The thing is i want to output the precisio, recall, and f1-score using classification report. But when i run below code, that error occurs. How can i fix the AttributeError?
print(classification_report(test.targets.cpu().numpy(),
File "C:\Users\Admin\PycharmProjects\ImageRotation\venv\lib\site-packages\torch\utils\data\dataset.py", line 83, in __getattr__
raise AttributeError
AttributeError
This is where i load the data from my directory.
data_loader = ImageFolder(data_dir,transform = transformer)
lab = data_loader.classes
num_classes = int(len(lab))
print("Number of Classes: ", num_classes)
print("The classes are as follows : \n",data_loader.classes)
batch_size = 128
train_size = int(len(data_loader) * 0.8)
test_size = len(data_loader) - train_size
train,test = random_split(data_loader,[train_size,test_size])
train_size = int(len(train) * 0.8)
val_size = len(train) - train_size
train_data, val_data = random_split(train,[train_size,val_size])
#load the train and validation into batches.
print(f"Length of Train Data : {len(train_data)}")
print(f"Length of Validation Data : {len(val_data)}")
print(f"Length of Test Data : {len(test)}")
train_dl = DataLoader(train_data, batch_size, shuffle = True)
val_dl = DataLoader(val_data, batch_size*2)
test_dl = DataLoader(test, batch_size, shuffle=True)
model.evaL() code
with torch.no_grad():
# set the model in evaluation mode
model.eval()
# initialize a list to store our predictions
preds = []
# loop over the test set
for (x, y) in test_dl:
# send the input to the device
x = x.to(device)
# make the predictions and add them to the list
pred = model(x)
preds.extend(pred.argmax(axis=1).cpu().numpy())
# generate a classification report
print(classification_report(test.targets.cpu().numpy(),
np.array(preds), target_names=test.classes))
It seems, ImageFolder is the your dataset object, but that is not inherited ted from torch.utils.data.Datasets.
torch Dataloader tries to call the __getitem__ method in the your Dataset object, but since it is not torch.utils.data.Dataset object it does not have has a function, then that causes to AttributeError now you are getting.
Convert ImageFolder to torch torch dataset. For further library details : torch doc
Practical implemtation : ast_dataloader
Also, you can use freeze model [without back propagation] to speedup the inference process.
with torch.no_grad():
# make the predictions and add them to the list
pred = model(x)
Update>
sample torch dataset:
from torch.utils.data import Dataset
class Dataset_train(Dataset):
def __init__(self, list_IDs, labels, base_dir):
"""self.list_IDs : list of strings (each string: utt key),
self.labels : dictionary (key: utt key, value: label integer)"""
self.list_IDs = list_IDs
self.labels = labels
self.base_dir = base_dir
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
key = self.list_IDs[index]
X, _ = get_sample(f"{self.base_dir}/{key}", self.noises)
y = self.labels[index]
return X, y
[Note] get_sample is custom build function for .wav file read. you could replace it with any funtion.
torch example-1
torch example-2
medium example
I am trying to implement a Custom Loss function that uses multiple predictions/forward propagations of images for an image classification model.
The general concept of this loss function is to evaluate the model's consistency with non-augmented and augmented images. That is to say, the model is given 2 images; the original image and its augmented counterpart. Then, both images are forward propagated through the model. The more different the two outputs are from each other, the higher the loss.
What this meant is a fairly low-level change, and the most apparent way of solving this, to me, was model subclassing. I created a subclass of the keras.Model class and changed the train_step() method to include a small algorithm for locating the respective augmented counterpart of each original image (not relevant to the issue at all), and more significantly, a line that gave a prediction on the augmented counterpart:
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
y_aug = self(self.augmented_data[aug_index:aug_index+self.batch_size], training=True)
loss = self.comparative_loss(y, y_pred, y_aug)
The whole self.augmented_data[aug_index:aug_index+self.batch_size] isn't relevant at all, it can be thought of just as the augmented data input. The intent was for the method "comparative_loss" to take the two predictions and then perform the aforementioned loss calculations on it.
The issue came when I tried to compile the model; there was a required loss parameter, but it refused to accept my custom loss method as it required 3 parameters. I couldn't go with the standard fix of putting the functions into a structure like this:
def new_loss(extra_parameter):
def loss(y_true, y_pred):
return loss_value
return loss
since my "extra_parameter" was not just a standard output of the model; it was a completely separate forward propagation on it, that relied on my custom train_step() method.
TL;DR:
What I'm most confused about is, why does tf.compile() even require a loss function, if my "train_step" method doesn't use it? The train_step method in my custom subclass has the loss built-in, so is there a way to override the .compile()'s loss parameter and have it work without me having to give it a method? If not, what other solutions are there?
The full code is below, though I sincerely apologize to anyone that reads it, as it's not quite finished:
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 18 11:37:08 2022
Custom Loss Function
Description:
For each element of y_true, compare the y_predict of
the original image and the complemented one, then return
a loss accordingly using the Euclidian distance
between the predictions for the original images and the complements.
y_predict are labels for the images, these labels can
come in any form: CIFAR labels, species labels, or labels of which
individual a given image is.
y_predict will be in the shape (batch_size, number_of_classes), using the
#author: hudso
"""
import tensorflow as tf
import keras
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization
import ssl
import numpy as np
import cv2 as cv
class CustomModel(keras.Model):
def __init__(self, classes):
super().__init__() #call parent constructor
self.conv_1 = Conv2D(32,(3,3),activation='relu',padding='same')
self.batch_1 = BatchNormalization()
self.conv_2 = Conv2D(32,(3,3),activation='relu',padding='same')
self.batch_2 = BatchNormalization()
self.pool_1 = MaxPooling2D((2,2))
self.conv_3 = Conv2D(64,(3,3),activation='relu',padding='same')
self.batch_3 = BatchNormalization()
self.conv_4 = Conv2D(64,(3,3),activation='relu',padding='same')
self.batch_4 = BatchNormalization()
self.pool_2 = MaxPooling2D((2,2))
self.conv_5 = Conv2D(128,(3,3),activation='relu',padding='same')
self.batch_5 = BatchNormalization()
self.conv_6 = Conv2D(128,(3,3),activation='relu',padding='same')
self.batch_6 = BatchNormalization()
self.flatten = Flatten()
self.layer_1 = keras.layers.Dropout(0.2)
self.layer_2 = Dense(256,activation='relu')
self.dropout = keras.layers.Dropout(0.2)
self.outputs = Dense(classes, activation='softmax') #no. of classes
self.classes = classes #Initializes the number of classes variable
#essentially the Functional API forward-pass call-structure shenanigans
#called each forward propagation (calculating loss, training, etc.)
def call(self, inputs):
#print("INPUTS: " + str(inputs))
x = self.conv_1(inputs)
x = self.batch_1(x)
x = self.conv_2(x)
x = self.batch_2(x)
x = self.pool_1(x)
x = self.conv_3(x)
x = self.batch_3(x)
x = self.conv_4(x)
x = self.batch_4(x)
x = self.pool_2(x)
x = self.conv_5(x)
x = self.batch_5(x)
x = self.conv_6(x)
x = self.batch_6(x)
x = self.flatten(x)
x = self.layer_1(x)
x = self.layer_2(x)
x = self.dropout(x)
x = self.outputs(x)
return x #returns the constructed model
#Imports necessary data (It's hard to gain access of the values handed to .fit())
def data_import(self, augmented_data, x_all, batch_size):
self.augmented_data = augmented_data
self.x_all = np.asarray(x_all, dtype=np.float32)
self.batch_size = batch_size
#Very useful advice: https://stackoverflow.com/questions/65889381/going-from-a-tensorarray-to-a-tensor
def comparative_loss(self, y_true, y_pred, y_aug):
output_loss = tf.TensorArray(tf.float32, size=self.classes)
batch_loss = tf.TensorArray(tf.float32, size=self.batch_size)
for n in range(self.batch_size):
for i in range(self.classes):
output_loss = output_loss.write(i, tf.square(tf.abs(tf.subtract(y_pred[n][i], y_aug[n][i])))) #finds Euclidean Distance for each prediction, then averages the loss across all iterations in the batch
indexes = tf.keras.backend.arange(0, self.classes, step=1, dtype='int32')
output_loss_tensor = output_loss.gather(indexes)
batch_loss = batch_loss.write(n, tf.math.reduce_sum(output_loss_tensor))
indexes = tf.keras.backend.arange(0, self.batch_size, step=1, dtype='int32')
batch_loss_tensor = batch_loss.gather(indexes)
total_loss = tf.math.reduce_sum(batch_loss_tensor)
total_loss = tf.math.divide(total_loss, self.batch_size)
print("TOTAL LOSS: " + str(total_loss))
return total_loss
def train_step(self, data):
x, y = data #Current batch
#Finds the range of indexes for the complements of the current batch of images
#A lower level implementation could make this significantly more efficient by avoiding searching each time
aug_index = 0
x_arr = x.numpy() #Turns the input data iterable Tensor into a numpy array, Eager Execution must be enabled for this to work
for i in range(np.size(self.x_all, axis = 0)):
difference = cv.subtract(self.x_all[i], x_arr[0])
if np.count_nonzero(difference) == 0: #In the .fit() line for this CustomModel, shuffle = False for this to work
aug_index = i #Lower bound of the batch of images
found = True
if found == False:
print("Yikes mate the x_arr wasn't found in x_all... probably a rounding error")
print("\nCurrent Index: " + str(aug_index))
#Forward pass/predictions + loss calculation
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
y_aug = self(self.augmented_data[aug_index:aug_index+self.batch_size], training=True)
loss = self.comparative_loss(y, y_pred, y_aug) #Computes the actual loss value
#I didn't touch any of this code
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
#Essentially emulates the environment that the model would normally be running in
#E.g. Creates the dataset, does Image Augmentation, etc.
#In the actual implementation, only the "CustomModel" class will be used, this is purely for testing purposes
class shrek_is_love:
def __init__(self):
self.complements = []
self.create_dataset()
#automatically runs
def create_dataset(self):
ssl._create_default_https_context = ssl._create_unverified_context
(images, labels), (_, _) = keras.datasets.cifar10.load_data() #only uses the training sets and then splits it again later since that'll be what we'll be dealing with in the happywhale dataset anyways
self.labels = labels
self.images = images
self.data_aug()
#NOT MY CODE this is liam's image data generator (thx liam ur cool)
#automatically runs
def data_aug(self):
imageGen = keras.preprocessing.image.ImageDataGenerator(width_shift_range=.3, height_shift_range=.3, horizontal_flip=True, zoom_range=.3)
imagees = np.zeros(shape=(1, 32, 32, 3))
for l in range(np.size(self.images, 0)):
# adjust the tuple inside of cv.resize to adjust resolution
temp = cv.resize(self.images[l], (32, 32))
imagees[0] = (cv.cvtColor(temp, cv.COLOR_BGR2RGB))
it = imageGen.flow(imagees)
im = it.next()
im = im[0].astype('float32')
im = im / 255.0
self.complements.append(im)
self.complements = np.asarray(self.complements, dtype=np.float)
self.images = self.images.astype(np.float)
self.images = self.images / 255.0
self.preprocessor()
def preprocessor(self):
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
self.labels = onehot_encoder.fit_transform(np.reshape(self.labels, (-1, 1)))
from sklearn.model_selection import train_test_split
shared_seed = 5 #the indexes of complements_train and image_train have to line up, so that labels_train can apply to both
self.complements_train, self.complements_test = train_test_split(self.complements, test_size=0.25, random_state=shared_seed)
self.images_train, self.images_test, self.labels_train, self.labels_test = train_test_split(self.images, self.labels, test_size=0.25, random_state=shared_seed)
#The following code will be all that is necessary to run the CustomModel classs
batch_size = 32
shrek_is_life = shrek_is_love()
model = CustomModel(10) #10 classes
model.data_import(shrek_is_life.complements_train, shrek_is_life.images_train, batch_size) #the model will not be training on aug_data, essentially turning it into a secondary test set
model.compile(optimizer='adam', loss=None, metrics=['accuracy'], run_eagerly=True) #loss=None brings up an error, but I have no idea what else to put in there
model.fit(x = shrek_is_life.images_train, y = shrek_is_life.labels_train, shuffle = False, batch_size = batch_size, epochs = 1)
EDIT:
Running it without a .compile line yields this error:
Traceback (most recent call last):
File "D:\Downloads\untitled0.py", line 191, in <module>
model.fit(x = shrek_is_life.images_train, y = shrek_is_life.labels_train, shuffle = False, batch_size = batch_size, epochs = 1)
File "C:\Users\hudso\anaconda3\envs\mlTens\lib\site-packages\keras\engine\training.py", line 1150, in fit
x, y, sample_weights = self._standardize_user_data(
File "C:\Users\hudso\anaconda3\envs\mlTens\lib\site-packages\keras\engine\training.py", line 508, in _standardize_user_data
raise RuntimeError('You must compile a model before '
RuntimeError: You must compile a model before training/testing. Use `model.compile(optimizer, loss)`.
Running .compile without the loss argument or with loss=None yields:
File "C:\Users\hudso\anaconda3\envs\mlTens\lib\site-packages\keras\engine\training.py", line 706, in _prepare_total_loss
raise ValueError('The model cannot be compiled '
ValueError: The model cannot be compiled because it has no loss to optimize.
I have set up custom training and testing functions in my project so I can minutely customise the training process. I use k-fold cross-validation to evaluate my model. For whatever reason, the model trains correctly for the first fold, and then on the second in throws this error.
tensorflow.python.framework.errors_impl.FailedPreconditionError: Could not find variable _AnonymousVar13. This could mean that the variable has been deleted. In TF1, it can also mean the variable is uninitialized. Debug info: container=localhost, status=Not found: Resource localhost/_AnonymousVar13/N10tensorflow3VarE does not exist. [[node test_model/dense_2/Tensordot/ReadVariableOp (defined at training_example.py:33) ]] [Op:__inference__train_step_1082]
I have no idea what's happening. I assumed the error arose because of poor initialisation, so I model.build() with the input shape. I have tried initialising the graph's weights with blank tensor, too, but that didn't work. I have also reset the backend on the last line in case there was a conflict with names, but that doesn't do the trick.
import numpy as np
import sklearn.model_selection
import tensorflow as tf
from tensorflow.python.keras.metrics import Mean, Precision, Recall
from tensorflow.python.keras.optimizer_v2.adam import Adam
n_splits = 5
batch_size = 16
n_epochs = 2
loss_function = tf.keras.losses.BinaryCrossentropy()
optimiser_fn = Adam
metrics = [
Mean(name='loss'),
Precision(name='prec'),
Recall(name='recall'),
]
learning_rate = 1e-2
dense_outputs = [10,10]
activation = 'relu'
class TestModel(tf.keras.Model):
def __init__(self):
super().__init__()
self._dense_ops = [tf.keras.layers.Dense(o) for o in dense_outputs]
self._output = tf.keras.layers.Dense(1)
def call(self, inputs):
hidden = inputs
for l in self._dense_ops:
hidden = l(hidden)
return self._output(hidden)
def _load_fold_sets_for_training(fold, fold_idcs, features, labels, batch_size):
# Get the indices for the sets.
train_idcs, validation_idcs, _ = fold_idcs[fold]
# Get the training data and labels.
training_data = features[train_idcs]
training_labels = labels[train_idcs]
# Load the training, validation and testing sets.
training_set = tf.data.Dataset.from_tensor_slices(
(training_data, training_labels)
)
training_set = training_set.batch(batch_size, drop_remainder=False)
validation_set = tf.data.Dataset.from_tensor_slices(
(features[validation_idcs], labels[validation_idcs])
)
validation_set = validation_set.batch(batch_size, drop_remainder=False)
return training_set, validation_set
#tf.function
def _train_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=True)
loss = loss_function(batch_predictions, batch_labels)
gradients = tf.gradients(loss, model.trainable_variables)
optimiser.apply_gradients(
zip(gradients, model.trainable_variables)
)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
#tf.function
def _inference_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=False)
loss = loss_function(batch_predictions, batch_labels)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
# Generate dataset.
features = np.random.rand(15,1440,1)
labels = np.random.rand(15,1440)
# Set up splits.
kfold = sklearn.model_selection.KFold(n_splits=n_splits, shuffle=True)
splits = []
for train_idcs, test_idcs in kfold.split(features):
train_idcs, val_idcs = sklearn.model_selection.train_test_split(train_idcs)
splits += [[train_idcs, val_idcs, test_idcs]]
fold = 0
while fold < n_splits:
# Load datasets for fold.
training_set, validation_set = _load_fold_sets_for_training(fold, splits, features, labels, batch_size)
# Load model.
model = TestModel()
# Build model.
model.build((1440, 1))
# Initialise Adam optimiser.
optimiser = optimiser_fn(learning_rate)
epoch = 0
while epoch < n_epochs:
epoch += 1
# Training.
for batch_features, batch_labels in training_set: _train_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'train_{m.name}: {m.result():0.05f}' for m in metrics))
# Validation.
for batch_features, batch_labels in validation_set: _inference_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'val_{m.name}: {m.result():0.05f}' for m in metrics))
tf.keras.backend.clear_session()
fold += 1
Any ideas?
The issue was the placement of the _train_step and _inference_step. If the two functions are redefined on every iteration of the fold, the error disappears and the model trains. I don't know why they must be redefined every step.
import numpy as np
import sklearn.model_selection
import tensorflow as tf
from tensorflow.python.keras.metrics import Mean, Precision, Recall
from tensorflow.python.keras.optimizer_v2.adam import Adam
n_splits = 5
batch_size = 2
n_epochs = 2
loss_function = tf.keras.losses.BinaryCrossentropy()
optimiser_fn = Adam
metrics = [
Mean(name='loss'),
Precision(name='prec'),
Recall(name='recall'),
]
learning_rate = 1e-2
dense_outputs = [10, 10]
activation = 'relu'
class TestModel(tf.keras.Model):
def __init__(self):
super().__init__()
self._dense_ops = [tf.keras.layers.Dense(o) for o in dense_outputs]
self._output = tf.keras.layers.Dense(1)
def call(self, inputs):
hidden = inputs
for l in self._dense_ops:
hidden = l(hidden)
return self._output(hidden)
def _load_fold_sets_for_training(fold, fold_idcs, features, labels, batch_size):
# Get the indices for the sets.
train_idcs, validation_idcs, _ = fold_idcs[fold]
# Get the training data and labels.
training_data = features[train_idcs]
training_labels = labels[train_idcs]
# Load the training, validation and testing sets.
training_set = tf.data.Dataset.from_tensor_slices(
(training_data, training_labels)
)
training_set = training_set.batch(batch_size, drop_remainder=False)
validation_set = tf.data.Dataset.from_tensor_slices(
(features[validation_idcs], labels[validation_idcs])
)
validation_set = validation_set.batch(batch_size, drop_remainder=False)
return training_set, validation_set
# Generate dataset.
features = np.random.rand(15, 1440, 1)
labels = np.random.rand(15, 1440)
# Set up splits.
kfold = sklearn.model_selection.KFold(n_splits=n_splits, shuffle=True)
splits = []
for train_idcs, test_idcs in kfold.split(features):
train_idcs, val_idcs = sklearn.model_selection.train_test_split(train_idcs)
splits += [[train_idcs, val_idcs, test_idcs]]
fold = 0
while fold < n_splits:
#tf.function
def _train_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=True)
loss = loss_function(batch_predictions, batch_labels)
gradients = tf.gradients(loss, model.trainable_variables)
optimiser.apply_gradients(
zip(gradients, model.trainable_variables)
)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
#tf.function
def _inference_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=False)
loss = loss_function(batch_predictions, batch_labels)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
# Load datasets for fold.
training_set, validation_set = _load_fold_sets_for_training(fold, splits, features, labels,
batch_size)
# Load model.
model = TestModel()
# Build model.
model.build((1440, 1))
# Initialise Adam optimiser.
optimiser = optimiser_fn(learning_rate)
epoch = 0
while epoch < n_epochs:
epoch += 1
# Training.
for batch_features, batch_labels in training_set: _train_step(batch_features,
batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'train_{m.name}: {m.result():0.05f}' for
m in metrics))
# Validation.
for batch_features, batch_labels in validation_set: _inference_step(batch_features,
batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'val_{m.name}: {m.result():0.05f}' for m
in metrics))
tf.keras.backend.clear_session()
fold += 1
I have the following Custom Generator for building pairs of images for siamese networks, as unfortunately all my training data do not fit on my GPU's memory:
import numpy as np
np.random.seed(42) # for reproducibility
import random
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda
from keras.optimizers import SGD, RMSprop
from keras import backend as K
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
class DataGenerator(object):
def __init__(self, batch_sz):
print("[INFO] loading dataset...")
trainX=np.load("features_and_labels/energy/train1-images.npy")
trainY=np.load("features_and_labels/energy/train1-labels.npy")
trainX, trainY = shuffle(trainX, trainY)
trainX, testX, trainY, testY = train_test_split(trainX, trainY, test_size=0.30, stratify=trainY, random_state=42)
trainX = trainX / 255.0
testX = testX / 255.0
print("READY")
# create training+test positive and negative pairs
digit_indices = [np.where(trainY == i)[0] for i in range(8)]
self.tr_pairs, self.tr_y = self.create_pairs(trainX, digit_indices)
digit_indices = [np.where(testY == i)[0] for i in range(8)]
self.te_pairs, self.te_y = self.create_pairs(testX, digit_indices)
self.tr_pairs_0 = self.tr_pairs[:, 0]
self.tr_pairs_1 = self.tr_pairs[:, 1]
self.te_pairs_0 = self.te_pairs[:, 0]
self.te_pairs_1 = self.te_pairs[:, 1]
self.batch_sz = batch_sz
self.samples_per_train = (self.tr_pairs.shape[0]/self.batch_sz)*self.batch_sz
self.samples_per_val = (self.te_pairs.shape[0]/self.batch_sz)*self.batch_sz
self.cur_train_index=0
self.cur_val_index=0
del trainX, trainY,
#(pairTest, labelTest) = utils.make_pairs(testX, testY)
print("DONE")
del testX,testY
def create_pairs(self, x, digit_indices):
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(8)]) - 1
for d in range(8):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(0, 8)
dn = (d + inc) % 7
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def next_train(self):
while 1:
self.cur_train_index += self.batch_sz
if self.cur_train_index >= self.samples_per_train:
self.cur_train_index=0
yield ([self.tr_pairs_0[self.cur_train_index:self.cur_train_index+self.batch_sz], self.tr_pairs_1[self.cur_train_index:self.cur_train_index+self.batch_sz]],self.tr_y[self.cur_train_index:self.cur_train_index+self.batch_sz])
def next_val(self):
while 1:
self.cur_val_index += self.batch_sz
if self.cur_val_index >= self.samples_per_val:
self.cur_val_index=0
yield ([self.te_pairs_0[self.cur_val_index:self.cur_val_index+self.batch_sz], self.te_pairs_1[self.cur_val_index:self.cur_val_index+self.batch_sz]], self.te_y[self.cur_val_index:self.cur_val_index+self.batch_sz])
I would like to use the following data augmentation techniques in these pairs of images:
augmentator=ImageDataGenerator(
rotation_range=20,
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest")
However, I have the following problem when calling it, here is how I do it
convnet = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
# Add the final fully connected layers
x = convnet.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.3)(x)
preds = Dense(18, activation='sigmoid')(x) # Apply sigmoid
convnet = Model(inputs=convnet.input, outputs=preds)
#Applying above model for both the left and right images
encoded_l = convnet(left_input)
encoded_r = convnet(right_input)
# Euclidian Distance between the two images or encodings through the Resnet-50 architecture
Euc_layer = Lambda(lambda tensor:K.abs(tensor[0] - tensor[1]))
# use and add the distance function
Euc_distance = Euc_layer([encoded_l, encoded_r])
#identify the prediction
prediction = Dense(1,activation='sigmoid')(Euc_distance)
#Define the network with the left and right inputs and the ouput prediction
siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
#Calling the generator for me
datagen = mycustomgenerator_v2.DataGenerator(config.BATCH_SIZE)
print("[INFO] compiling model...")
siamese_net.compile(loss="binary_crossentropy", optimizer="sgd", metrics="accuracy")
# train the model
print("[INFO] training model...")
lr_reducer= ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=3, min_lr=0.5e-6)
early_stopper=EarlyStopping(monitor='val_accuracy', min_delta=0.1, patience=250,restore_best_weights=True,verbose=1)
model_checkpoint= ModelCheckpoint("best_weight.h5", monitor="val_accuracy", save_best_only=True, save_weights_only=True,mode='auto')
callbacks=[lr_reducer,early_stopper,model_checkpoint]
augmentator=ImageDataGenerator(
rotation_range=20,
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest")
history=siamese_net.fit(augmentator.flow(datagen.next_train()), steps_per_epoch=datagen.samples_per_train, epochs=config.EPOCHS, validation_data=datagen.next_val(), validation_steps=datagen.samples_per_val, callbacks=callbacks)
Then the following error returns to me
File "train_siamese_network.py", line 163, in <module>
history=siamese_net.fit(augmentator.flow(datagen.next_train()), steps_per_epoch=datagen.samples_per_train, epochs=config.EPOCHS, validation_data=datagen.next_val(), validation_steps=datagen.samples_per_val, callbacks=callbacks)
File "/home/me/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image.py", line 854, in flow
return NumpyArrayIterator(
File "/home/me/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image.py", line 450, in __init__
super(NumpyArrayIterator, self).__init__(
File "/home/me/.local/lib/python3.8/site-packages/keras_preprocessing/image/numpy_array_iterator.py", line 121, in __init__
self.x = np.asarray(x, dtype=self.dtype)
File "/home/me/.local/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
return array(a, dtype, copy=False, order=order)
TypeError: float() argument must be a string or a number, not 'generator'
For sure I am not calling the data augmentator on the generator correctly, I also checked other similar posts like this,this and this but they are not helping me. Therefore, how am I supposed to call data augmentation on the custom generator for my siamese network?
I don't know how to apply the ImageDataAugmentation in Pairs Dataset.
But I can tell you that a possibility is to generate the images in your disk. It means that you will generate images from the original Dataset (when there are no pairs), and in the next step you transform it to pairs.
As a disadventage, this method will put very similar images in Test and Train data, that could mean overfiting.
Trying to get similar results on same dataset with Keras and PyTorch.
Data
from numpy import array
from numpy import hstack
from sklearn.model_selection import train_test_split
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the dataset
if end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def get_data():
# define input sequence
in_seq1 = array([x for x in range(0,500,10)])/1
in_seq2 = array([x for x in range(5,505,10)])/1
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps
# convert into input/output
X, y = split_sequences(dataset, n_timesteps)
print(X.shape, y.shape)
X_train,x_test,Y_train, y_test = train_test_split(X,y,test_size = 0.2,shuffle=False)
return X_train,x_test,Y_train, y_test
Keras
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.metrics import mean_squared_error
import testing.TimeSeries.datacreator as dc # !!!!change this!!!!
X_train,x_test,Y_train, y_test = dc.get_data()
n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps
# define model
model = Sequential()
model.add(LSTM(1024, activation='relu',
input_shape=(n_timesteps, n_features),
kernel_initializer='uniform',
recurrent_initializer='uniform'))
model.add(Dense(512, activation='relu'))
model.add(Dense(1))
opt = keras.optimizers.Adam(lr=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=keras.optimizers.K.epsilon(),
decay=0.0,
amsgrad=False)
model.compile(optimizer=opt, loss='mse')
# fit model
model.fit(X_train, Y_train, epochs=200, verbose=1,validation_data=(x_test,y_test))
yhat = model.predict(x_test, verbose=0)
mean_squared_error(y_test, yhat)
PyTorch - module class
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error
import testing.TimeSeries.datacreator as dc # !!!! change this !!!!
X_train,x_test,Y_train, y_test = dc.get_data()
n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps
class MV_LSTM(torch.nn.Module):
def __init__(self,n_features,seq_length):
super(MV_LSTM, self).__init__()
self.n_features = n_features # number of parallel inputs
self.seq_len = seq_length # number of timesteps
self.n_hidden = 1024 # number of hidden states
self.n_layers = 1 # number of LSTM layers (stacked)
self.l_lstm = torch.nn.LSTM(input_size = n_features,
hidden_size = self.n_hidden,
num_layers = self.n_layers,
batch_first = True)
# according to pytorch docs LSTM output is
# (batch_size,seq_len, num_directions * hidden_size)
# when considering batch_first = True
self.l_linear = torch.nn.Linear(self.n_hidden*self.seq_len, 512)
# self.l_linear1 = torch.nn.Linear(512, 512)
self.l_linear2 = torch.nn.Linear(512, 1)
def init_hidden(self, batch_size):
# even with batch_first = True this remains same as docs
hidden_state = torch.zeros(self.n_layers,batch_size,self.n_hidden).to(next(self.parameters()).device)
cell_state = torch.zeros(self.n_layers,batch_size,self.n_hidden).to(next(self.parameters()).device)
self.hidden = (hidden_state, cell_state)
def forward(self, x):
batch_size, seq_len, _ = x.size()
lstm_out, self.hidden = self.l_lstm(x,self.hidden)
# lstm_out(with batch_first = True) is
# (batch_size,seq_len,num_directions * hidden_size)
# for following linear layer we want to keep batch_size dimension and merge rest
# .contiguous() -> solves tensor compatibility error
x = lstm_out.contiguous().view(batch_size,-1)
x = F.relu(x)
x = F.relu(self.l_linear(x))
# x = F.relu(self.l_linear1(x))
x = self.l_linear2(x)
return x
PyTorch - init and train
# create NN
mv_net = MV_LSTM(n_features,n_timesteps)
criterion = torch.nn.MSELoss()
import keras # for epsilon constant
optimizer = torch.optim.Adam(mv_net.parameters(),
lr=1e-3,
betas=[0.9,0.999],
eps=keras.optimizers.K.epsilon(),
weight_decay=0,
amsgrad=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mv_net.to(device)
train_episodes = 200
batch_size = 32
eval_batch_size = 32
for t in range(train_episodes):
# TRAIN
mv_net.train()
for b in range(0,len(X_train),batch_size):
inpt = X_train[b:b+batch_size,:,:]
target = Y_train[b:b+batch_size]
x_batch = torch.tensor(inpt,dtype=torch.float32).to(device)
y_batch = torch.tensor(target,dtype=torch.float32).to(device)
mv_net.init_hidden(x_batch.size(0))
output = mv_net(x_batch)
loss = criterion(output.view(-1), y_batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# EVAL
mv_net.eval()
mv_net.init_hidden(eval_batch_size)
acc = 0
for b in range(0,len(x_test),eval_batch_size):
inpt = x_test[b:b+eval_batch_size,:,:]
target = y_test[b:b+eval_batch_size]
x_batch = torch.tensor(inpt,dtype=torch.float32).to(device)
y_batch = torch.tensor(target,dtype=torch.float32).to(device)
mv_net.init_hidden(x_batch.size(0))
output = mv_net(x_batch)
acc += mean_squared_error(y_batch.cpu().detach().numpy(), output.view(-1).cpu().detach().numpy())
print('step:' , t , 'train loss:' , round(loss.item(),3),'eval acc:',round(acc/len(x_test),3))
mv_net.init_hidden(len(x_test))
val = torch.tensor(x_test,dtype=torch.float32).to(device)
otp = mv_net(val)
print(mean_squared_error(y_test, otp.view(-1).cpu().detach().numpy()))
Results
Keras produces test MSE almost 0, but PyTorch about 6000, which is way too different
I have tried couple tweaks in PyTorch code, but none got me anywhere close to similar keras, even with identical optim params
I cant see what is wrong with (kinda tutorialic) PyTorch code
I know it is almost one year too late. But I came across the same problem and I think the problem is the following. From the keras documentation it says:
return_sequences: Boolean. Whether to return the last output in the
output sequence, or the full sequence.
this basically means that the input shape of your self.l_linear needs to be torch.nn.Linear(1024, 512) instead of self.n_hidden*self.seq_len, 512.
Now you also need to do the same as keras does and only use the last output in your forward pass:
def forward(self, x):
batch_size, seq_len, _ = x.size()
lstm_out, self.hidden = self.l_lstm(x,self.hidden)
x = lstm_out[:,-1]
x = torch.nn.functional.relu(x)
x = torch.nn.functional.relu(self.l_linear(x))
x = self.l_linear2(x)
return x
when I run your example (which I needed to tweak a bit to get it run) I get very similar training losses.
Keras:
38/38 [==============================] - 0s 6ms/step - loss: 67.6081 - val_loss: 325.9259
PyTorch:
step: 199 train loss: 41.043 eval acc: 1142.688
I hope this helps others having a similar problem.
PS also note that keras is resetting the hidden state (stateful=False) by default.