I use tensorflow's Dataset such that y is a dictionary of 6 tensors which I all use in a single loss function which looks likes this:
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
return a + b + c
return custom_loss
And I have a model with 3 outputs of different shapes. When I compile the model and call fit method I get Value Error
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=10)
ValueError: Found unexpected keys that do not correspond to any
Model output: dict_keys(['a_0', 'a_1', 'b_0', 'b_1', 'c_0', 'c_1']).
Expected: ['output_0', 'output_1', 'output_2']
where output_0, 'output_1', 'output_2' are names of the output layers.
I figured that naming the output layers by the keys in the dataset should solve the issue but the problem is I have 6 tensors in the dataset and only 3 outputs. I'm aware I can assign a loss function to every output with a single dataset ground truth tensor, but again I need to pass at least two tensors as GT.
So far I've used a custom training loop but I'd rather use the fit method. I'm using tensorflow 2.3.1
EDIT:
Example model:
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
Custom training loop:
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
for epoch in range(1, epochs+1):
for batch, (images, labels) in enumerate(train_dataset):
with tf.GradientTape() as tape:
outputs = model(images, training=False)
reg_loss = tf.reduce_sum(model.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
avg_loss.update_state(total_loss)
print(f'Epoch {epoch}/{epochs} - Loss: {avg_loss.result().numpy()}')
avg_loss.reset_states()
Minimal reproducible code:
import tensorflow as tf
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
return custom_loss
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)), 'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
optimizer = tf.keras.optimizers.Adam(1e-3)
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=1)
Here is one approach for your case. We will still use a custom training loop but also take the leverage of the convenient .fit method by customizing this method. Please check the document for more details of this: Customizing what happens in fit()
Here is one simple demonstration, extending your reproducible code.
import tensorflow as tf
# data set
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)),
'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
# custom loss
def loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
Custom Model
This is basically overriding the train_step that will run repeatedly over each batch of data.
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
class custom_fit(tf.keras.Model):
def train_step(self, data):
images, labels = data
with tf.GradientTape() as tape:
outputs = self(images, training=True) # forward pass
reg_loss = tf.reduce_sum(self.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
avg_loss.update_state(total_loss)
return {"loss": avg_loss.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [avg_loss]
Build Model
# model
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
# simply pass input and outps to the custom model
custom_model = custom_fit(inputs=[inputs],
outputs=[x1, x2, x3])
Compile and Fit
custom_model.compile(optimizer='adam')
custom_model.fit(dataset, epochs=5, verbose=2)
Epoch 1/5
1/1 - 6s - loss: 73784.0078
Epoch 2/5
1/1 - 1s - loss: 64882.8984
Epoch 3/5
1/1 - 1s - loss: 54760.2500
Epoch 4/5
1/1 - 1s - loss: 47696.7031
Epoch 5/5
1/1 - 1s - loss: 40574.6328
Related
I would like to see what is happening in my loss function during model fitting.
However, I cannot figure out how to do that.
This is what I am trying but it does not work:
def custom_loss(label : tf.Tensor, pred : tf.Tensor) -> tf.Tensor:
mask = label != 0
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction='none')
loss = loss_object(label, pred)
mask = tf.cast(mask, dtype=loss.dtype)
tf.print("\n---------------------------")
tf.print("custom_loss - str(loss):", str(loss))
tf.print("custom_loss - str(mask):", str(mask))
try:
tf.print("tf.keras.backend.eval(loss):", tf.keras.backend.eval(loss))
except:
tf.print("tf.keras.backend.eval(loss) does not work - exception!")
loss = tf.reshape(loss, shape=(batch_size, loss.shape[1], 1))
loss *= mask
loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
tf.print("\n============================")
return loss
After starting training by calling the fit() function I only get the following output:
2/277 [..............................] - ETA: 44s - loss: 0.6931 - masked_accuracy: 0.0000e+00
---------------------------
custom_loss - str(loss): Tensor("custom_loss/binary_crossentropy/weighted_loss/Mul:0", shape=(None, 20), dtype=float32)
custom_loss - str(mask): Tensor("custom_loss/Cast:0", shape=(None, 20, 1), dtype=float32)
tf.keras.backend.eval(loss) does not work - exception!
How do I display the actual value of label, pred, mask and loss?
In TF 2 Keras, it can be done by training the model in eager mode, i.e run_eagerly=True with model.fit. It's an argument avilable in model.compile method. From doc.
run_eagerly: Bool. Defaults to False. If True, this Model's logic will not be wrapped in a tf.function.
Now, the end-to-end solution can be achieved in many ways, i.e with straightforward method model.fit or customize the fit method. Here are some pointer.
loss_object = keras.losses.BinaryCrossentropy(
from_logits=True,
reduction='none'
)
def custom_loss(label : tf.Tensor, pred : tf.Tensor) -> tf.Tensor:
mask = label != 1
loss = loss_object(label, pred)
mask = tf.cast(mask, dtype=loss.dtype)
if tf.executing_eagerly():
print("custom_loss - str(loss): \n", str(loss))
print("custom_loss - str(mask): \n", str(mask), '\n'*2)
loss = tf.reshape(loss, shape=(tf.shape(loss)[0], -1))
loss *= mask
loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
return loss
With vanila model.fit:
# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(1, activation=None)(inputs)
model = keras.Model(inputs, outputs)
# We don't passs a loss or metrics here.
model.compile(optimizer="adam", loss=custom_loss, run_eagerly=True)
# Just use `fit` as usual -- you can use callbacks, etc.
x = tf.random.normal([10, 32], 0, 1, tf.float32)
y = np.random.randint(2, size=(10, 1))
model.fit(x, y, epochs=5)
custom_loss - str(loss):
tf.Tensor(
[0.3215071 0.6470841 3.401876 1.6478868 0.4492059 0.67835623
0.1574089 1.3314284 1.9282155 0.5588544 ], shape=(10,), dtype=float32)
custom_loss - str(mask):
tf.Tensor(
[[0.]
[0.]
[1.]
[1.]
[1.]
[0.]
[0.]
[0.]
[0.]
[0.]], shape=(10, 1), dtype=float32)
1/1 [==============================] - 0s 20ms/step - loss: 1.8330
<keras.callbacks.History at 0x7f4332ef4d10>
Or, with custom model.fit, the output would be same as above.
class CustomModel(keras.Model):
def train_step(self, data):
x, y = data
# notice, istead of print value in custom loss -
# I can do the same here
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute our own loss
loss = custom_loss(y, y_pred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
return {"loss": loss}
# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(1, activation=None)(inputs)
model = CustomModel(inputs, outputs)
model.compile(optimizer="adam", run_eagerly=True)
model.fit(x, y)
And lastly, if you wanna go with more low-level operation, then you can use custom training loop. Check the mentioned blogs, they're pretty resourceful.
I applied PCA on MNIST with a reduced dimensionality of 32. Then, to test it, I created a simple classification network. The train accuracy is good: 96%, but on the other hand, the test accuracy is 2%.
So what's wrong?
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=1)
#print(mean)
#print(mean[:,None])
x -= mean[:,None]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
pca = tf.matmul(u[:,0:k], s[0:k,0:k])
#print(pca)
#print(pca.shape)
return pca
x = pca(x)
x2 = pca(x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
OUTPUT:
Epoch 5/5
60000/60000 [==============================] - 1s 23us/sample - loss: 0.1278 - acc: 0.9626 - val_loss: 11.0141 - val_acc: 0.0202
First, you are appling two differents "pca" for each set. The eigenvectors and eigenvalues of train can be different than test set.
Second, you are using SVD to obtain principal components, but this components is not the result that you want. Use principal axis like a projection matrix to obtain a better/compress representation of the data.
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=0)
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
projM = v[:, 0:k] #tf.matmul(u[:,0:k], s[0:k,0:k])
return mean, projM
def apply_pca(mean, projM, x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
return tf.matmul(x, projM)
mean, projM = pca(x)
x = apply_pca(mean, projM, x)
x2 = apply_pca(mean, projM, x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
I train Siamese network with constructive loss on two classes of MNIST dataset to identify whether two images are similar or not. Although the loss is decreasing in the beginning, it freezes later with accuracy around 0.5.
The model is trained on pairs of images and a label (0.0 for different, 1.0 for identical). I used only two classes for simplicity (zeros and ones) and prepared the dataset, so that it contains every pair of images. I've checked that the dataset is consistent (image pairs from dataset). I've also experimented with data normalization, different batch sizes, learning rates, initializations and regularization constants with no luck.
This is the model:
class Encoder(Model):
"""
A network that finds a 50-dimensional representation of the input images
so that the distances between them minimize the constructive loss
"""
def __init__(self):
super(Encoder, self).__init__(name='encoder')
self.cv = Conv2D(32, (3, 3), activation='relu', padding='Same',
input_shape=(28, 28, 1),
kernel_regularizer=tf.keras.regularizers.l2(0.01))
self.pool = MaxPooling2D((2, 2))
self.flatten = Flatten()
self.dense = Dense(50, activation=None,
kernel_regularizer=tf.keras.regularizers.l2(0.01))
def call(self, inputs, training=None, mask=None):
""" Forward pass for one image """
x = self.cv(inputs)
x = self.pool(x)
x = self.flatten(x)
x = self.dense(x)
return x
#staticmethod
def distance(difference):
""" The D function from the paper which is used in loss """
distance = tf.sqrt(tf.reduce_sum(tf.pow(difference, 2), 0))
return distance
The loss and accuracy:
def simnet_loss(target, x1, x2):
difference = x1 - x2
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), difference)
loss = tf.map_fn(lambda distance: target * tf.square(distance) +
(1.0 - target) * tf.square(tf.maximum(0.0, 1.0 - distance)), distance_vector)
average_loss = tf.reduce_mean(loss)
return average_loss
def accuracy(y_true, y_pred):
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), y_pred)
accuracy = tf.keras.metrics.binary_accuracy(y_true, distance_vector)
return accuracy
Training:
def train_step(images, labels):
with tf.GradientTape() as tape:
x1, x2 = images[:, 0, :, :, :], images[:, 1, :, :, :]
x1 = model(x1)
x2 = model(x2)
loss = simnet_loss(labels, x1, x2)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return loss
model = Encoder()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for epoch in range(n_epoch):
epoch_loss = 0
n_batches = int(x_train.shape[0]/batch_size)
for indices in np.array_split(np.arange(x_train.shape[0]), indices_or_sections=n_batches):
x = np.take(x_train, indices, axis=0)
y = np.take(y_train, indices, axis=0)
epoch_loss += train_step(x, y)
epoch_loss = epoch_loss / n_batches
accuracy = test_step(x_train, y_train)
val_accuracy = test_step(x_test, y_test)
tf.print("epoch:", epoch, "loss:", epoch_loss, "accuracy:", accuracy,
"val_accuracy:", val_accuracy, output_stream=sys.stdout)
The code above produces:
epoch: 0 loss: 0.755419433 accuracy: 0.318898171 val_accuracy:
0.310316473
epoch: 1 loss: 0.270610392 accuracy: 0.369466901 val_accuracy:
0.360871345
epoch: 2 loss: 0.262594223 accuracy: 0.430587918 val_accuracy:
0.418002456
epoch: 3 loss: 0.258690506 accuracy: 0.428258181 val_accuracy:
0.427044809
epoch: 4 loss: 0.25654456 accuracy: 0.43497327 val_accuracy:
0.44800657
epoch: 5 loss: 0.255373538 accuracy: 0.444840342 val_accuracy:
0.454993844
epoch: 6 loss: 0.254594624 accuracy: 0.453885168 val_accuracy:
0.454171807
I have created a custom layer as follows in Keras where I define the forward propagation function by calling conv_forward()
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras import metrics
import tensorflow as tf
tf.keras.backend.clear_session() # For easy reset of notebook state.
print(tf.__version__)
def conv_forward(A_prev, W, b, parameters): # forward prop having a for loop
"""
Implements the forward propagation for a convolution function
Arguments:
A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
b -- Biases, numpy array of shape (1, 1, 1, n_C)
hparameters -- python dictionary containing "stride" and "pad"
Returns:
Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
cache -- cache of values needed for the conv_backward() function
"""
expand = hparameters["expand"]
channels = hparameters["channels"]
depth_batch = channels // expand
# Conv2D for 1 step of gradual update
for i in range(depth_batch):
# if you dont add b or not use a registered parameter, tensorflow will give error as follows:
# Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
Z = tf.nn.conv2d(A_prev, W, [1, 1, 1, 1], "SAME") + b
A_prev = tf.concat([A_prev[:, :, :, :i*expand ], Z, A_prev[:, :, :, i*expand + expand : ]], 3)
return A_prev
class Gunn2D(layers.Layer): # custom layer definition
def __init__(self, input_channels, expansion_rate=32):
super(Gunn2D, self).__init__()
self.input_channels = input_channels
self.expansion_rate = expansion_rate
self.hparameters = {"expand": self.expansion_rate, "channels": self.input_channels}
def build(self, input_shape):
self.w = self.add_weight(shape=(3, 3, self.input_channels, self.expansion_rate), initializer='random_normal', trainable=True)
self.b = self.add_weight(shape=(1, 1, 1, self.expansion_rate), initializer='random_normal', trainable=True)
def call(self, inputs):
output = conv_forward(inputs, self.w, self.b, self.hparameters)
return output
def GunnModel(input_shape):
"""
Implementation of the Model.
Arguments:
input_shape -- shape of the images of the dataset
Returns:
model -- a Model() instance in Keras
"""
X_input = Input(input_shape)
Gunn2D_layer = Gunn2D(6, 2) # At instantiation, we don't know on what inputs this is going to get called
X = Gunn2D_layer(X_input) # using the Custom Keras layer
print('After gunnlayer : {}'.format(X.get_shape()))
X = Flatten()(X)
X = Dense(3, activation='softmax', name = 'fc1')(X)
model = Model(inputs = X_input, outputs = X, name = 'GunnModel')
return model
I create the model and fit it
X_train = tf.ones((50, 5, 5, 6))
X_test = tf.ones((20, 5, 5, 6))
Y_train = tf.ones((50, 3))
Y_test = tf.ones((20, 3))
gunnModel = GunnModel(X_train.shape[1:])
gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
preds = gunnModel.evaluate(x=X_test, y=Y_test)
print()
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))
and it gives me successful training output as follows:
Epoch 1/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3864 - categorical_accuracy: 1.0000
Epoch 2/5
5/5 [==============================] - 0s 2ms/step - loss: 3.3766 - categorical_accuracy: 0.0000e+00
Epoch 3/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3967 - categorical_accuracy: 0.0000e+00
Epoch 4/5
5/5 [==============================] - 0s 2ms/step - loss: 3.4462 - categorical_accuracy: 0.8000
Epoch 5/5
5/5 [==============================] - 0s 2ms/step - loss: 3.5673 - categorical_accuracy: 1.0000
1/1 [==============================] - 0s 2ms/step - loss: 3.6945 - categorical_accuracy: 1.0000
Loss = 3.69450306892395
Test Accuracy = 1.0
This is a dummy program to get the layer working, so don't care about the accuracy.
Let's print the summary:
gunnModel.summary()
Output:
Model: "GunnModel"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 5, 5, 6)] 0
_________________________________________________________________
gunn2d (Gunn2D) (None, 5, 5, 6) 110
_________________________________________________________________
flatten (Flatten) (None, 150) 0
_________________________________________________________________
fc1 (Dense) (None, 3) 453
=================================================================
Total params: 563
Trainable params: 563
Non-trainable params: 0
_________________________________________________________________
Now, when I add more features in the custom layer, specifically in conv_forward() layer like BatchNormalization and Activation, it gives me error.
New definition of conv_forward():
def conv_forward(A_shortcut, W1, b1, W2, b2, W3, b3, hparameters):
"""
Implements the forward propagation for a convolution function
Arguments:
A_shortcut -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
b -- Biases, numpy array of shape (1, 1, 1, n_C)
hparameters -- python dictionary containing "stride" and "pad"
Returns:
A -- conv output, numpy array of shape (m, n_H, n_W, n_C)
"""
expand = hparameters["expand"]
channels = hparameters["channels"]
depth_batch = channels // expand
# Conv2D for 1 step of gradual update
# Note: if you dont add b or not use a registered parameter, tensorflow will give error as follows:
# Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
A = tf.identity(A_shortcut)
for i in range(depth_batch):
Z = tf.nn.conv2d(A, W1, [1, 1, 1, 1], "VALID") + b1
A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
A = Activation('relu')(A)
for i in range(depth_batch):
Z = tf.nn.conv2d(A, W2, [1, 1, 1, 1], "SAME") + b2
A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
A = BatchNormalization(axis = 3 , name = 'Gunn_BN_2')(A)
A = Activation('relu')(A)
for i in range(channels):
Z = tf.nn.conv2d(A, W3, [1, 1, 1, 1], "VALID") + b3
A = tf.concat([A[:, :, :, :i ], Z, A[:, :, :, i + 1 : ]], 3)
A = BatchNormalization(axis = 3 , name = 'Gunn_BN_3')(A)
# Add shortcut value to main path. This implements the identity block in Residual Network.
A = Add()([A , A_shortcut])
print('Resnet : {}'.format(A.shape))
return A
Error:
Resnet : (None, 5, 5, 6)
After gunnlayer : (None, 5, 5, 6)
Epoch 1/5
Resnet : (10, 5, 5, 6)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-28-d6d9fedc335a> in <module>()
6 gunnModel = GunnModel(X_train.shape[1:])
7 gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
----> 8 gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
9 preds = gunnModel.evaluate(x=X_test, y=Y_test)
10 print()
9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:505 train_function *
outputs = self.distribute_strategy.run(
<ipython-input-1-3e0d6e941353>:75 call *
output = conv_forward(inputs, self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.hparameters)
<ipython-input-27-56c3c46e1785>:37 conv_forward *
A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:897 __call__ **
self._maybe_build(inputs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2416 _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py:400 build
experimental_autocast=False)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:577 add_weight
caching_device=caching_device)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py:743 _add_variable_with_custom_getter
**kwargs_for_getter)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer_utils.py:141 make_variable
shape=variable_shape if variable_shape else None)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:259 __call__
return cls._variable_v1_call(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:220 _variable_v1_call
shape=shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
return captured_getter(captured_previous, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
return next_creator(**kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
return captured_getter(captured_previous, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
return next_creator(**kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
return captured_getter(captured_previous, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
return next_creator(**kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
return captured_getter(captured_previous, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py:511 invalid_creator_scope
*emphasized text*"tf.function-decorated function tried to create "
ValueError: tf.function-decorated function tried to create variables on non-first call.
I even tried to use #tf.function for the for loops but the issue remaining and I suspect its after adding Batch Normalization but on print statement 'ResNet' is printed, but the immediately next print statement 'After gunnlayer ' is not printed during training. Both are printed during model creation.
I'm trying to add a second hidden layer to my neural net, training on the MNIST dataset. With only a simple hidden layer the training works fine, and the accuracy increases steadily.
When I try to add the second layer, the accuracy gets stuck on 0.117 each time i start training. Just can't figure out what I'm doing wrong here?
I've tried adding sigmoid to my y with no luck.
XTrain = XTrain[0:10000,:]
YTrain = YTrain[0:10000]
K = len(set(YTrain))
N = len(YTrain)
M = 12 #Hidden layer units
D = XTrain.shape[1]
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=0.01))
b1 = tf.Variable(tf.random_normal([M], stddev=0.01))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=0.01))
b2 = tf.Variable(tf.random_normal([M], stddev=0.01))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=0.01))
b3 = tf.Variable(tf.random_normal([K], stddev=0.01))
# MODEL
h1 = tf.nn.sigmoid(tf.matmul(tfX, W1) + b1)
h2 = tf.nn.sigmoid(tf.matmul(h1, W2) + b2)
y = tf.matmul(h2,W3) + b3
# Softmax and cross-entropy
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels = tfY,
logits = y)
)
# Targets One-Hot encoded
T = np.zeros((N,K))
for i in range(N):
T[i,YTrain[i]] = 1
#Gradient descent
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(y, 1)
# Start session and initialize variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# TRAIN
for i in range(10000):
sess.run(train_op, feed_dict={tfX: XTrain, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: XTrain, tfY: T})
if i % 20 == 0:
print("Accuracy:", np.mean(YTrain == pred))
When I start training the output looks like this:
Accuracy: 0.0991
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
I figured out a solution to the problem myself.
Apparently the initialization of the weights weren't right. It works if I change the initialization to:
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=1) / np.sqrt(D))
b1 = tf.Variable(tf.random_normal([M], stddev=1))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=1) / np.sqrt(M))
b2 = tf.Variable(tf.random_normal([M], stddev=1))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=1) / np.sqrt(M))
b3 = tf.Variable(tf.random_normal([K], stddev=1))
Why I'm still not quite sure of, would appreciate any answers and feedback.