Val_loss is nan at very first of training - python

val_loss is NaN at very first of training.
The Structure
def trainingResNet(source_folder):
# Preprocessing
image_gen_train = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=90, horizontal_flip=True)
image_gen_train.fit(x1_train)
# Load Model
model = Network_frame.ResNet18([2, 2, 2, 2])
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
training_dir = '#%s adam_sca_Undistort' % '5'
checkpoint_save_path = os.path.join('checkpoint', training_dir, 'model.{epoch:02d}-{loss:.2f}.ckpt')
# Callback setting
log_dir = os.path.join('cnnlogs', training_dir)
cp_callback = [
tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0),
tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True, save_best_only=True, monitor='loss', mode='min'),
tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_delta=0.005, mode='auto', min_lr=0.0001)]
# Training
history = model.fit([x1_train, x2_train], y_train, batch_size=16, epochs=800,
validation_data=([x1_test, x2_test], y_test), validation_freq=1, callbacks=cp_callback)
Network frame
class ResNetBlock(Model):
def __init__(self, filters, strides=1, residual_path=False):
super(ResNetBlock, self).__init__()
self.filters = filters
self.strides = strides
self.residual_path = residual_path
self.c1 = Conv2D(filters, (3, 3), strides=strides, padding='same', use_bias=False)
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.c2 = Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False)
self.b2 = BatchNormalization()
if residual_path:
self.down_c1 = Conv2D(filters, (1, 1), strides=strides, padding='same', use_bias=False)
self.down_b1 = BatchNormalization()
self.a2 = Activation('relu')
def call(self, inputs):
residual = inputs
x = self.c1(inputs)
x = self.b1(x)
x = self.a1(x)
x = self.c2(x)
y = self.b2(x)
if self.residual_path:
residual = self.down_c1(inputs)
residual = self.down_b1(residual)
out = self.a2(y + residual)
return out
class ResNet18(Model):
def __init__(self, block_list, initial_filters=64):
super(ResNet18, self).__init__()
self.num_blocks = len(block_list)
self.block_list = block_list
self.out_filters = initial_filters
self.c1_1 = Conv2D(self.out_filters, (7, 7), strides=1, padding='same', use_bias=False,
kernel_initializer='he_normal')
self.b1_1 = BatchNormalization()
self.a1_1 = Activation('relu')
self.p1_1 = MaxPool2D(pool_size=(2, 2), strides=1, padding='same')
self.blocks = tf.keras.models.Sequential()
for block_id in range(len(block_list)):
for layer_id in range(block_list[block_id]):
if block_id != 0 and layer_id == 0:
block = ResNetBlock(self.out_filters, strides=2, residual_path=True)
else:
block = ResNetBlock(self.out_filters, residual_path=False)
self.blocks.add(block)
self.out_filters *= 2
self.p2_1 = GlobalAveragePooling2D()
self.f1_1 = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))
self.d1_1 = Dropout(0.2)
self.f2_1 = Dense(64, activation='relu', )
self.f1_2 = Dense(16, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))
self.d1_2 = Dropout(0.2)
self.f2_2 = Dense(16, activation='relu', )
self.f1 = Dense(64, activation='relu', )
self.d1 = Dropout(0.2)
self.f2 = Dense(32, activation='relu', )
self.d2 = Dropout(0.2)
self.f3 = Dense(40, activation='softmax')
def call(self, inputs):
x1 = self.c1_1(inputs[0])
x1 = self.b1_1(x1)
x1 = self.a1_1(x1)
x1 = self.p1_1(x1)
x1 = self.blocks(x1)
x1 = self.p2_1(x1)
x1 = self.f1_1(x1)
x1 = self.d1_1(x1)
x1 = self.f2_1(x1)
x2 = self.f1_2(inputs[1])
x2 = self.d1_2(x2)
x2 = self.f2_2(x2)
x = tf.keras.layers.concatenate([x1, x2])
x = self.f1(x)
x = self.d1(x)
x = self.f2(x)
x = self.d2(x)
y = self.f3(x)
return y
The network can be fitted normally, but val_loss is NaN at the first epoch
2864/2875 [============================>.] - ETA: 0s - loss: 13.3955 - sparse_categorical_accuracy: 0.0583
2875/2875 [==============================] - 73s 25ms/sample - loss: 13.3595 - sparse_categorical_accuracy: 0.0584 - val_loss: nan - val_sparse_categorical_accuracy: 0.0668
Epoch 2/800
The network can be fitted normally, but val_loss is NaN at the first epoch
The approaches that have been tried
Use other version of TF (currently 2.1)
Use LeakyReLU instead of ReLU
Decrease lr
Make sure the input does not have NaN
Make sure the target is computable by the loss function (greater than 0)
I would like to ask if there is any way to fix it or
it is possible to reproduce the val_loss calculation to get the location of the NaN if the model checkpoint is saved
Many thanks

Related

dont know why is this error: Graph disconnected: cannot obtain value for tensor Tensor

i want to establish the VAE-CNN but i dont know why show this error.
train_datagen = ImageDataGenerator(rescale=1. / 255)
validation_datagen = ImageDataGenerator(rescale=1. / 255)
train_gen = train_datagen.flow_from_directory(
'./train for dataset/',
target_size=(80, 24),
color_mode='grayscale',
batch_size=32,
class_mode='input',
shuffle=True,
seed = 42
)
validation_gen = validation_datagen.flow_from_directory(
'./test/',
target_size=(80, 24),
color_mode='grayscale',
batch_size=32,
class_mode='input',
shuffle=False,
seed = 42
)
#VAE-CNN
filter1_V=64
filter2_V=88
latent_dim_V=20
original_inputs = keras.Input(shape=(80,24,1))
init = tf.keras.initializers.VarianceScaling(scale=0.3, mode='fan_in',distribution='uniform')
layer1_v = layers.Conv2D(filter1_V, kernel_size=3, activation = 'relu', kernel_initializer=init, padding='same', strides = 2)(original_inputs)
layer1_v = layers.MaxPool2D(pool_size=(2,2))(layer1_v)
# strides is 2 in default, which equals to pool_size
layer2_v = layers.Conv2D(filter2_V, kernel_size=3, activation='relu', kernel_initializer=init, padding='same', strides = 2)(layer1_v)
layer2_v = layers.MaxPool2D(pool_size=(2,2))(layer2_v)
layer3_v = layers.Flatten()(layer2_v)
# start to code the core part of mean and variance
#get mean
layer_mean = layers.Dense(latent_dim_V)(layer3_v)
# get log variance, it can get the value from negative to positive, if only use variance, the value is only positive
log_var = layers.Dense(latent_dim_V)(layer3_v)
# dur to the sample, in order to get back propogation, add one parameter which its distribution is normal(0,1)
def sampling(args):
layer_mean,log_var=args
eps = K.random_normal(shape=(K.shape(log_var)[0],latent_dim_V),mean=0.,stddev=1.0)
# reparameterize
# the standard varinace is what we want
std = K.exp(log_var)**0.5
return layer_mean + std * eps
z = layers.Lambda(sampling, output_shape=(latent_dim_V,))([layer_mean, log_var])
#decoder part
dec1_v = layers.Dense(layer3_v.shape[1], activation='relu')(z)
dec2_v = layers.Reshape((layer2_v.shape[1],layer2_v.shape[2],layer2_v.shape[3]))(dec1_v)
dec3_v = layers.Conv2DTranspose(filter2_V, kernel_size=3, output_padding=(1,2), activation = 'relu',kernel_initializer=init, padding = 'same', strides=(2,3))(dec2_v)
dec4_v = layers.Conv2DTranspose(filter1_V, kernel_size=3, activation = 'relu', kernel_initializer=init, padding = 'same', strides=2)(dec3_v)
dec5_v = layers.Conv2DTranspose(filter1_V, kernel_size=3, activation = "relu", kernel_initializer=init, padding = 'same', strides=2)(dec4_v)
dec_v_outputs = layers.Conv2DTranspose(1, kernel_size=3, activation = "relu", kernel_initializer=init, padding = 'same', strides=2)(dec5_v)
encoder_v = keras.Model(inputs=original_inputs, outputs=[z,layer_mean,log_var], name='encoder')
decoder_v = keras.Model(inputs=z, outputs=dec_v_outputs, name='decoder')
outputs = decoder_v(encoder_v(original_inputs)[0])
vae_model = keras.Model(inputs=original_inputs, outputs=outputs, name='vae_model')
vae_model.summary()
kl_loss = -0.5 * K.sum(log_var + 1 - layer_mean**2 - K.exp(log_var), axis=-1)
kl_loss = K.mean(kl_loss)/1920.
lr=1e-3
optimizer = keras.optimizers.Adam(learning_rate=lr)
vae_model.add_loss(kl_loss)
vae_model.compile(optimizer, loss="binary_crossentropy")
history=vae_model.fit(train_gen,train_gen, epochs=4, batch_size=32, validation_data=(validation_gen,validation_gen))
i want to get a VAE-CNN
and there is a error:ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_1:0", shape=(None, 80, 24, 1), dtype=float32) at layer "input_1". The following previous layers were accessed without issue: []
why is it and how to solve?

Nan losses using "Learning Rate Step Decay" Scheduler with Adam Optimizer in Keras?

I have this very deep model:
def get_model2(mask_kind):
decay = 0.0
inp_1 = keras.Input(shape=(64, 101, 1), name="RST_inputs")
x = layers.Conv2D(256, kernel_size=(3, 3), kernel_regularizer=l2(1e-6), strides=(3, 3), padding="same")(inp_1)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2D(128, kernel_size=(3, 3), kernel_regularizer=l2(1e-6), strides=(3, 3), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2D(64, kernel_size=(2, 2), kernel_regularizer=l2(1e-6), strides=(2, 2), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2D(32, kernel_size=(2, 2), kernel_regularizer=l2(1e-6), strides=(2, 2), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Flatten()(x)
x = layers.Dense(512)(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Dense(256)(x)
x = layers.LeakyReLU(alpha=0.3)(x)
out1 = layers.Dense(128, name="ls_weights")(x)
if mask_kind == 1: # APPLICA LA PRIMA MASCHERA
binary_mask = layers.Lambda(mask_layer1, name="lambda_layer1", dtype='float64')(out1)
print('shape', binary_mask.shape[0])
elif mask_kind == 2: # APPLICA LA SECONDA MASCHERA
binary_mask = layers.Lambda(mask_layer2, name="lambda_layer2", dtype='float64')(out1)
else: # NON APPLICA NULLA
binary_mask = out1
x = layers.Dense(256)(binary_mask)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Dense(512)(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Dense(192)(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Reshape((2, 2, 48))(x)
x = layers.Conv2DTranspose(32, kernel_size=(2, 2), strides=(2, 2), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2DTranspose(64, kernel_size=(3, 3), strides=(3, 3), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2DTranspose(128, kernel_size=(3, 3), strides=(3, 3), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
x = layers.Conv2DTranspose(256, kernel_size=(3, 3), strides=(5, 5), padding="same")(x)
x = layers.LeakyReLU(alpha=0.3)(x)
soundfield_layer = layers.Conv2DTranspose(1, kernel_size=(1, 1), strides=(1, 1), padding='same')(x)
# soundfield_layer = layers.Dense(40000, name="sf_vec")(x)
if mask_kind == 1:
model = keras.Model(inp_1, [binary_mask, soundfield_layer], name="2_out_model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1, decay=decay), # in caso
# rimettere 0.001
loss=["mse", "mse"], loss_weights=[1, 1])
# plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
model.summary()
else:
model = keras.Model(inp_1, [binary_mask, soundfield_layer], name="2_out_model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1, decay=decay), # in caso
# rimettere 0.001
loss=["mse", "mse"], loss_weights=[0, 1])
# plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
model.summary()
return model
and I'm trying to use Learning rate Step Decay to see if I can improve my validation loss function during training. I'm defining the class for the scheduler as follows:
class StepDecay:
def __init__(self, initAlpha=0.1, factor=0.25, dropEvery=30):
# store the base initial learning rate, drop factor, and
# epochs to drop every
self.initAlpha = initAlpha
self.factor = factor
self.dropEvery = dropEvery
def __call__(self, epoch):
# compute the learning rate for the current epoch
exp = np.floor((1 + epoch) / self.dropEvery)
alpha = self.initAlpha * (self.factor ** exp)
# return the learning rate
return float(alpha)
and then I run my training:
schedule = StepDecay(initAlpha=1e-1, factor=0.25, dropEvery=30)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50)
callbacks = [es, LearningRateScheduler(schedule)]
model = get_model2(mask_kind=1)
history = model.fit(X_train, [Y_train, Z_train], validation_data=(X_val, [Y_val, Z_val]), epochs=300,
batch_size=32,
callbacks=callbacks, verbose=1)
test_loss, _, _ = model.evaluate(X_test, [Y_test, Z_test], verbose=1)
print('Test: %.3f' % test_loss)
but when I train I get "nan" losses:
25/25 [==============================] - 17s 684ms/step - loss: nan - lambda_layer1_loss: nan - conv2d_transpose_4_loss: nan - val_loss: nan - val_lambda_layer1_loss: nan etc....
and I don't understand why. The problem could be the decay rate which is a parameter present in the SGD optimizer but that from the documentation does not exists for Adam, but I get no error that so..any ideas?
You can play with the parameters to find a good balance, but this is one way to use exponential decay as a callback function with the Adam optimizer.
LR_MAX = 0.0001
LR_MIN = 0.00001
LR_EXP_DECAY = 0.85
def lrfn(epoch):
lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch) + LR_MIN
return lr
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)
Simply define the callback like in the following example.
model.fit(..
..
callbacks = [lr_callback],
..
..)

Convert from keras to pytorch

I am training a multilabel classifier in keras ,i want to convert that to pytorch , i am mostly confused about how to handle the loss ,this is what the code looks like
model = keras.applications.densenet.DenseNet121(include_top=False, input_shape=(224, 224, 3))
x = model.output
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
model = Model(model.inputs,[output1,output2,output3,output4,output5, output6, output7, output8])
# print(model.summary())
model.compile(optimizers.rmsprop(lr = 0.0001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"],metrics = ["accuracy"])
How can i do this in pytorch ,this is what i have till now
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(2304, 256)
self.fc2 = nn.Linear(256, 8)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(x.size(0), -1) # Flatten layer
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.sigmoid(x)
model = Net().cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# data, target = data.cuda(async=True), target.cuda(async=True) # On GPU
data, target = Variable(data).cuda(), Variable(target).float().cuda()
optimizer.zero_grad()
output = model(data)
loss = F.binary_cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
for epoch in range(1, 10):
train(epoch)
Thanks in advance, any suggestions will be very helpful

Loss value becomes constant after some training steps while training a CNN in Tensorflow

I'm trying to develop a convolutional neural network for image classification.
Currently I am working on classifying a set from about 1000 cats and dogs images.
However, I´m stuck in the training process.
Firstly, I tried to develop my own network, preprocessing and labeling the images myself, testing with different architectures and hyperparameters using Tensorflow.
As I didn't obtain good results, I tried to create a similar network with keras, obtainig better results.
In the following code I create the trainig and validation sets for the tensorflow network:
def oneHot(img):
label = img.split('.')[-3]
if label == 'cat': return [1, 0]
elif label == 'dog': return [0, 1]
def loadData(img_dir):
global img_h
global img_w
data_set = []
for img in tqdm(os.listdir(img_dir)):
label = oneHot(img)
path = os.path.join(img_dir, img)
img = cv2.imread(path)
img = cv2.resize(img, (img_h, img_w))
data_set.append([np.array(img/255, dtype='float32'), np.array(label)])
shuffle(data_set)
return data_set
def divideSet(data_set, train_size):
len_train = int(len(data_set)*train_size)
train_set = data_set[:len_train]
valid_set = data_set[len_train:]
return train_set, valid_set
def separateArgLabel(data_set):
arg = np.array([i[0] for i in data_set])
label = np.array([i[1] for i in data_set])
return arg, label
train_set = loadData(train_dir)
train_data, valid_data = divideSet(train_set, 0.8)
x_train, y_train = separateArgLabel(train_data)
x_valid, y_valid = separateArgLabel(valid_data)
And the code that I used to build and train my model in tensorflow:
def flattenLayer(x):
layer_shape = x.get_shape()
n_input = layer_shape[1:4].num_elements()
flat_layer = tf.reshape(x,[-1,n_input])
return flat_layer
def getRandomBatch(x, y, size):
rnd_idx = np.random.choice(len(x), size)
x_batch = x[rnd_idx]
y_batch = y[rnd_idx]
return x_batch, y_batch
with tf.Session() as sess:
x = tf.placeholder(tf.float32, shape=[None,img_w,img_h,img_c])
y = tf.placeholder(tf.float32, shape=[None,2])
conv1 = tf.layers.conv2d(x, 32, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2], strides=2)
conv2 = tf.layers.conv2d(pool1, 64, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv2, pool_size=[2,2], strides=2)
conv3 = tf.layers.conv2d(pool2, 128, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(conv3, pool_size=[2,2], strides=2)
conv4 = tf.layers.conv2d(pool3, 64, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool4 = tf.layers.max_pooling2d(conv4, pool_size=[2,2], strides=2)
conv5 = tf.layers.conv2d(pool4, 32, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool5 = tf.layers.max_pooling2d(conv5, pool_size=[2,2], strides=2)
flatten = flattenLayer(pool5)
fc1 = tf.layers.dense(flatten, 1024, activation=tf.nn.relu)
logits = tf.layers.dense(fc1, 2, activation=tf.nn.relu)
y_pred = tf.nn.softmax(logits)
cross_entropy = losses.categorical_crossentropy(y, y_pred)
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(0.0005)
grads = optimizer.compute_gradients(loss)
train = optimizer.apply_gradients(grads)
y_cls = tf.arg_max(y, 1)
y_pred_cls = tf.arg_max(y_pred, 1)
correct = tf.equal(y_pred_cls, y_cls)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(10):
sum_loss_train = 0
sum_acc_train = 0
for i in range(100):
batch_x, batch_y = getRandomBatch(x_train, y_train, 32)
feed_dict_train = {x:batch_x, y:batch_y}
_,loss_train,acc_train = sess.run([train,loss,accuracy],
feed_dict=feed_dict_train)
sum_loss_train += loss_train
sum_acc_train += acc_train
sys.stdout.write('\r'+str(i+1)+'/'+str(100)+'\t'+'loss: '+
str(sum_loss_train/(i+1))+' accuracy: '+str(acc_train))
sys.stdout.flush()
mean_loss_train = sum_loss_train/(i+1)
mean_acc_train = sum_acc_train/(i+1)
print("\nÉpoca: " + str(epoch+1) + " ===========> Epoch loss: " + "
{:.4f}".format(mean_loss_train))
print("\tEpoch accuracy: " + "{:.2f} %".format(mean_acc_train*100))
sum_loss_val = 0
sum_acc_val = 0
for j in range(50):
batch_x_val, batch_y_val = getRandomBatch(x_valid, y_valid, 32)
feed_dict_valid = {x:batch_x_val, y:batch_y_val}
loss_val,acc_val = sess.run([loss,accuracy],
feed_dict=feed_dict_valid)
sum_acc_val += acc_val
sum_loss_val += loss_val
mean_acc_val = sum_acc_val/(j+1)
mean_loss_val = sum_loss_val/(j+1)
print("\nValidation loss: " + "{:.4f}".format(mean_loss_val))
print("\tValidation accuracy: " + "{:.2f} %".format(mean_acc_val*100))
When I run the model, after some iterations, the gradients always became zero an the loss got stuck in a constant value.
At first I thought the network stopped learning because of the lack of images, but when I tried to train the same dataset with a network built in Keras, the results were pretty good.
I used the same number of layers, the same hiperparameters and I processed the images the same way in both cases. Although the weigth's initialization may differ, the results make me think that there is some error in the code I added.
Could someone help me with this issue?

Returning 3 images from data generator

I am trying to pass to my triplet network 3 images using my data generator. I am loading the different pairs and stacking them into batches. I don't know how can I return it back as 3 different arrays. I tried appending into a list, but that also didn't work. How can I use a data generator to return them back?
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, batch_size=16, dim=(244,244,3), n_channels=3, shuffle=True):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.list_IDs = list_IDs
self.n_channels = n_channels
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X,Z, y = self.__data_generation(list_IDs_temp)
return X, Z, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
# V = np.stack((X, Z), axis=-1)
# F = np.stack((V, y), axis=-1)
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.dim))
Z = np.empty((self.batch_size, *self.dim))
y = np.empty((self.batch_size, *self.dim))
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
image = plt.imread(os.path.join(IMAGE_DIR, ID[0])).astype(np.float32)
image = imresize(image, (IM_SIZE, IM_SIZE))
image1 = plt.imread(os.path.join(IMAGE_DIR, ID[1])).astype(np.float32)
image1 = imresize(image1, (IM_SIZE, IM_SIZE))
image2 = plt.imread(os.path.join(IMAGE_DIR, ID[2])).astype(np.float32)
image2 = imresize(image2, (IM_SIZE, IM_SIZE))
X[i,] = image
Z[i,] = image1
y[i,] = image2
return X, Z, y
input_a = Input(shape=(224,224,3))
input_b = Input(shape=(224,224,3))
input_c = Input(shape=(224,224,3))
conv = Sequential([
Conv2D(24, (7, 7), strides=(1,1), input_shape=(224,224,3)),
BatchNormalization(epsilon=1e-06, axis=1, momentum=0.9),
MaxPooling2D((3,3), strides=(2, 2)),
Activation('relu'),
Dropout(0.2),
ZeroPadding2D((2, 2)),
Conv2D(64, (5, 5), padding='same', strides=(1,1), kernel_initializer='glorot_uniform'),
BatchNormalization(epsilon=1e-06, axis=1, momentum=0.9),
MaxPooling2D((3,3), strides=(2, 2)),
Activation('relu'),
Dropout(0.2),
ZeroPadding2D((1, 1)),
Conv2D(96, (3,3), padding='same', strides=(1,1),kernel_initializer='glorot_uniform'),
BatchNormalization(epsilon=1e-06, axis=1, momentum=0.9),
MaxPool2D(pool_size=(2,2), strides=(2,2)),
Activation('relu'),
Dropout(0.2),
ZeroPadding2D((1, 1)),
Conv2D(96, (3,3), padding='same', strides=(1,1),kernel_initializer='glorot_uniform'),
BatchNormalization(epsilon=1e-06, axis=1, momentum=0.9),
Activation('relu'),
MaxPool2D(pool_size=(2,2), strides=(2,2)),
Dropout(0.2),
ZeroPadding2D((1, 1)),
Conv2D(64, (5, 5), padding='same', strides=(1,1), kernel_initializer='glorot_uniform'),
BatchNormalization(epsilon=1e-06, axis=1, momentum=0.9),
Activation('relu', name="activation_1_5"),
MaxPooling2D((3,3), strides=(2, 2)),
Dropout(0.2),
Dense(256, activation='relu'),
Flatten()
])
net1 = conv(input_a)
net2 = conv(input_b)
net3 = conv(input_c)
d1 = subtract(net1, net2)
d2 = subtract(net1, net3)
n1 = norm(d1)
n2 = norm(d2)
out = Activation('sigmoid')(subtract(n2, n1))
model = Model(inputs=[input_a, input_b, input_c], outputs=out)
params = {'dim': (224,224,3),
'batch_size': BATCH_SIZE,
'n_channels': 3,
'shuffle': False}
paramsv = {'dim': (224,224,3),
'batch_size': BATCH_SIZE,
'n_channels': 3,
'shuffle': True}
training_generator = DataGenerator(partition_image['train'], **params)
validation_generator = DataGenerator(partition_image['validation'], **paramsv)
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=1e-6)
filepath = 'weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5'
cpkt1 = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1)
cpkt2 = TensorBoard(log_dir='tensorboard/', histogram_freq=0, write_graph=True, write_images=True)
cpkt3 = EarlyStopping(monitor='val_loss', min_delta=0, patience=4, verbose=0, mode='auto')
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
model.fit_generator(generator=training_generator,
validation_data=validation_generator,
steps_per_epoch=int(np.ceil(len(partition_image['train']) / BATCH_SIZE) ),
validation_steps=int(np.ceil(len(partition_image['validation']) / BATCH_SIZE) ),
epochs= EPOCHS,
shuffle = True,
verbose=1, callbacks=[cpkt1,cpkt2,cpkt3])
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 3 array(s), but instead got the following list of 1 arrays: [array([[[[180., 189., 194.],
[...
There might be other solutions, but what I do is to name my input layers and then use as inputs an dictionary with the same names.
So in your model you should name your inputs:
input_a = Input(shape=(224,224,3), name = "input_a")
input_b = Input(shape=(224,224,3), name = "input_b")
input_c = Input(shape=(224,224,3), name = "input_b")
Then, in the generator must return something like this:
inputs ={"input_a":X,
"input_b":Z,
"input_c":y}
outputs ={"output":o}
return inputs,outputs
You can find and example with a generator with multiple inputs in this keras example

Categories

Resources