How to use SVD inside keras layers? - python

My aim is to use SVD to PCA whiten the latent layer before passing it to the decoder module of an autoencoder. I have used tf.linalg.svd but it does not work since it does not contain necessary Keras parameters. So as a workaround I was trying to wrap it inside Lambda but got this error
AttributeError: 'tuple' object has no attribute 'shape'.
I tried SO (E.g. Using SVD in a custom layer in Keras/tensorflow) and did Google search for SVD in Keras but could not find any answers. I have attached a stripped but functional code here:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from keras.layers import Lambda, Input, Dense, Multiply, Subtract
from keras.models import Model
from keras import backend as K
from keras.losses import mse
from keras import optimizers
from keras.callbacks import EarlyStopping
x = np.random.randn(100, 5)
train_data = preprocessing.scale(x)
input_shape = (5, )
original_dim = train_data.shape[1]
intermediate_dim_1 = 64
intermediate_dim_2 = 16
latent_dim = 2
batch_size = 10
epochs = 15
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
layer_1 = Dense(intermediate_dim_1, activation='tanh') (inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
encoded_layer = Dense(latent_dim, name='latent_layer') (layer_2)
encoder = Model(inputs, encoded_layer, name='encoder')
encoder.summary()
# build decoder model
latent_inputs = Input(shape=(latent_dim,))
layer_1 = Dense(intermediate_dim_1, activation='tanh') (latent_inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
outputs = Dense(original_dim,activation='sigmoid') (layer_2)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# mean removal and pca whitening
meanX = Lambda(lambda x: tf.reduce_mean(x, axis=0, keepdims=True))(encoded_layer)
standardized = Subtract()([encoded_layer, meanX])
sigma2 = K.dot(K.transpose(standardized), standardized)
sigma2 = Lambda(lambda x: x / batch_size)(sigma2)
s, u ,v = tf.linalg.svd(sigma2,compute_uv=True)
# s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon), tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
whitened_encoding = K.dot(standardized, whitening_layer)
# Connect models
z_decoded = decoder(standardized)
# z_decoded = decoder(whitened_encoding)
# Define losses
reconstruction_loss = mse(inputs,z_decoded)
# Instantiate autoencoder
ae = Model(inputs, z_decoded, name='autoencoder')
ae.add_loss(reconstruction_loss)
# callback = EarlyStopping(monitor='val_loss', patience=5)
adam = optimizers.adam(learning_rate=0.002)
ae.compile(optimizer=adam)
ae.summary()
ae.fit(train_data, epochs=epochs, batch_size=batch_size,
validation_split=0.2, shuffle=True)
To reproduce the error uncomment these lines and comment the one preceding it:
z_decoded = decoder(whitened_encoding)
s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
I would appreciate it if someone could tell me how to wrap the SVD inside Keras layers or an alternate implementation.
Please note that I have not included the reparameterization trick to calculate the loss to keep the code simple.
Thank you !

I solved the problem. To use SVD inside Keras, we need to use the Lambda layer. However, as Lambda returns a tensor with some additional attributes, it is best to do additional work inside the lambda function and return a tensor. Another problem with my code was the combination of encoder and decoder model which I fixed by combining the output of encoder to the input of decoder model. The working code is as follows:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from keras.layers import Lambda, Input, Dense, Multiply, Subtract
from keras.models import Model
from keras import backend as K
from keras.losses import mse
from keras import optimizers
from keras.callbacks import EarlyStopping
def SVD(sigma2):
s ,u ,v = tf.linalg.svd(sigma2,compute_uv=True)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon),
tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
return whitening_layer
x = np.random.randn(100, 5)
train_data = preprocessing.scale(x)
input_shape = (5, )
original_dim = train_data.shape[1]
intermediate_dim_1 = 64
intermediate_dim_2 = 16
latent_dim = 2
batch_size = 10
epochs = 15
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
layer_1 = Dense(intermediate_dim_1, activation='tanh') (inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
encoded_layer = Dense(latent_dim, name='latent_layer') (layer_2)
encoder = Model(inputs, encoded_layer, name='encoder')
encoder.summary()
# build decoder model
latent_inputs = Input(shape=(latent_dim,))
layer_1 = Dense(intermediate_dim_1, activation='tanh') (latent_inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
outputs = Dense(original_dim,activation='sigmoid') (layer_2)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# mean removal and pca whitening
meanX = Lambda(lambda x: tf.reduce_mean(x, axis=0, keepdims=True))(encoded_layer)
standardized = Subtract()([encoded_layer, meanX])
sigma2 = K.dot(K.transpose(standardized), standardized)
sigma2 = Lambda(lambda x: x / batch_size)(sigma2)
# s, u ,v = tf.linalg.svd(sigma2,compute_uv=True)
whitening_layer = Lambda(SVD)(sigma2)
'''
s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon),
tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
'''
print('whitening_layer shape=', np.shape(whitening_layer))
print('standardized shape=', np.shape(standardized))
whitened_encoding = K.dot(standardized, whitening_layer)
# Connect models
# z_decoded = decoder(standardized)
z_decoded = decoder(encoder(inputs))
# Define losses
reconstruction_loss = mse(inputs,z_decoded)
# Instantiate autoencoder
ae = Model(inputs, z_decoded, name='autoencoder')
ae.add_loss(reconstruction_loss)
# callback = EarlyStopping(monitor='val_loss', patience=5)
adam = optimizers.adam(learning_rate=0.002)
ae.compile(optimizer=adam)
ae.summary()
ae.fit(train_data, epochs=epochs, batch_size=batch_size,
validation_split=0.2, shuffle=True)
The output of running the code is as follows:
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) (None, 5) 0
_________________________________________________________________
dense_1 (Dense) (None, 64) 384
_________________________________________________________________
dense_2 (Dense) (None, 16) 1040
_________________________________________________________________
latent_layer (Dense) (None, 2) 34
=================================================================
Total params: 1,458
Trainable params: 1,458
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 2) 0
_________________________________________________________________
dense_3 (Dense) (None, 64) 192
_________________________________________________________________
dense_4 (Dense) (None, 16) 1040
_________________________________________________________________
dense_5 (Dense) (None, 5) 85
=================================================================
Total params: 1,317
Trainable params: 1,317
Non-trainable params: 0
_________________________________________________________________
whitening_layer shape= (2, 2)
standardized shape= (None, 2)
/home/manish/anaconda3/envs/ica_gpu/lib/python3.7/site-packages/keras/engine/training_utils.py:819: UserWarning: Output decoder missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to decoder.
'be expecting any data to be passed to {0}.'.format(name))
Model: "autoencoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) (None, 5) 0
_________________________________________________________________
encoder (Model) (None, 2) 1458
_________________________________________________________________
decoder (Model) (None, 5) 1317
=================================================================
Total params: 2,775
Trainable params: 2,775
Non-trainable params: 0
_________________________________________________________________
Train on 80 samples, validate on 20 samples
Epoch 1/15
2020-05-16 16:01:55.443061: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
80/80 [==============================] - 0s 3ms/step - loss: 1.1739 - val_loss: 1.2238
Epoch 2/15
80/80 [==============================] - 0s 228us/step - loss: 1.0601 - val_loss: 1.0921
Epoch 3/15
80/80 [==============================] - 0s 261us/step - loss: 0.9772 - val_loss: 1.0291
Epoch 4/15
80/80 [==============================] - 0s 223us/step - loss: 0.9385 - val_loss: 0.9875
Epoch 5/15
80/80 [==============================] - 0s 262us/step - loss: 0.9105 - val_loss: 0.9560
Epoch 6/15
80/80 [==============================] - 0s 240us/step - loss: 0.8873 - val_loss: 0.9335
Epoch 7/15
80/80 [==============================] - 0s 217us/step - loss: 0.8731 - val_loss: 0.9156
Epoch 8/15
80/80 [==============================] - 0s 253us/step - loss: 0.8564 - val_loss: 0.9061
Epoch 9/15
80/80 [==============================] - 0s 273us/step - loss: 0.8445 - val_loss: 0.8993
Epoch 10/15
80/80 [==============================] - 0s 235us/step - loss: 0.8363 - val_loss: 0.8937
Epoch 11/15
80/80 [==============================] - 0s 283us/step - loss: 0.8299 - val_loss: 0.8874
Epoch 12/15
80/80 [==============================] - 0s 254us/step - loss: 0.8227 - val_loss: 0.8832
Epoch 13/15
80/80 [==============================] - 0s 227us/step - loss: 0.8177 - val_loss: 0.8789
Epoch 14/15
80/80 [==============================] - 0s 241us/step - loss: 0.8142 - val_loss: 0.8725
Epoch 15/15
80/80 [==============================] - 0s 212us/step - loss: 0.8089 - val_loss: 0.8679
I hope this helps.

Related

Convert Tensoflow model to PyTorch model - model isn't learning

I'm trying to port a tensorflow neural network to pytorch, as an exercise to familiarize myself with both / their nuances. This is the tensorflow network I'm porting to pytorch:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding="post")
x_test = sequence.pad_sequences(x_test, maxlen=400, padding="post")
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding='valid',activation='relu',strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))
The shapes of each layer is shown below:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 400, 50) 250000
dropout (Dropout) (None, 400, 50) 0
conv1d (Conv1D) (None, 398, 250) 37750
global_max_pooling1d (Globa (None, 250) 0
lMaxPooling1D)
dense (Dense) (None, 250) 62750
dropout_1 (Dropout) (None, 250) 0
activation (Activation) (None, 250) 0
dense_1 (Dense) (None, 1) 251
activation_1 (Activation) (None, 1) 0
=================================================================
Total params: 350,751
Trainable params: 350,751
Non-trainable params: 0
And the output of the tensorflow model is:
Epoch 1/10
loss: 0.4043 - accuracy: 0.8021 - val_loss: 0.2764 - val_accuracy: 0.8854
Epoch 2/10
loss: 0.2332 - accuracy: 0.9052 - val_loss: 0.2690 - val_accuracy: 0.8888
Epoch 3/10
loss: 0.1598 - accuracy: 0.9389 - val_loss: 0.2948 - val_accuracy: 0.8832
Epoch 4/10
loss: 0.1112 - accuracy: 0.9600 - val_loss: 0.3015 - val_accuracy: 0.8906
Epoch 5/10
loss: 0.0810 - accuracy: 0.9700 - val_loss: 0.3057 - val_accuracy: 0.8868
Epoch 6/10
loss: 0.0537 - accuracy: 0.9811 - val_loss: 0.4055 - val_accuracy: 0.8868
Epoch 7/10
loss: 0.0408 - accuracy: 0.9860 - val_loss: 0.4083 - val_accuracy: 0.8852
Epoch 8/10
loss: 0.0411 - accuracy: 0.9845 - val_loss: 0.4789 - val_accuracy: 0.8789
Epoch 9/10
loss: 0.0380 - accuracy: 0.9862 - val_loss: 0.4828 - val_accuracy: 0.8827
Epoch 10/10
loss: 0.0329 - accuracy: 0.9879 - val_loss: 0.4999 - val_accuracy: 0.8825
Here's what I have in my PyTorch port over:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
class CustomDataset(Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
x = self.pool(x)
x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1]))))
x = self.activation2(self.output(x))
return x
class FitTorchModel():
def __init__(self, model, num_epochs=10, steps_per_epoch=782):
self.model = model
self.epochs = num_epochs
self.steps_per_epoch = steps_per_epoch
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
crit = torch.nn.BCELoss(reduction = "mean")
history_df = pd.DataFrame(columns = ["Loss", "Accuracy", "Val_Loss", "Val_Acc"])
for epoch in range(self.epochs):
self.model.train()
print(f"Epoch {epoch}")
epoch_loss = 0
epoch_acc = 0
it = iter(train_dataloader)
for step in tqdm(range(self.steps_per_epoch)):
opt.zero_grad()
x, y = next(it)
y_pred = self.model(x).view(-1)
loss = crit(y_pred, y)
epoch_loss += loss.item()
epoch_acc += accuracy_score(y==1, y_pred > 0.5)
loss.backward()
opt.step()
val_loss, val_acc = self.predict_proba(test_dataloader, crit)
df = pd.DataFrame({"Loss": epoch_loss/(step+1),
"Accuracy": epoch_acc/(step+1),
"Val_Loss": val_loss, "Val_Acc": val_acc}, index=[0])
history_df = pd.concat((history_df, df), ignore_index=True)
return history_df
def predict_proba(self, test_dataloader, crit):
self.model.eval()
val_loss = 0
val_acc = 0
it = iter(test_dataloader)
with torch.no_grad():
for step in tqdm(range(self.steps_per_epoch)):
x,y = next(it)
y_pred = self.model(x).view(-1)
batch_loss = crit(y_pred, y)
val_loss += batch_loss.item()
val_acc += accuracy_score(y==1, y_pred > 0.5)
return val_loss/(step+1), val_acc/(step+1)
ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)
The shape of each layer is:
After embedding layer: torch.Size([32, 400, 50])
After dropout1 layer: torch.Size([32, 400, 50])
After convolution1d layer: torch.Size([32, 250, 398])
After maxpooling layer: torch.Size([32, 250, 1])
After linear1 layer: torch.Size([32, 250])
After dropout2 layer: torch.Size([32, 250])
After activation layer: torch.Size([32, 250])
After output layer: torch.Size([32, 1])
After activation2 layer: torch.Size([32, 1])
The output of the pytorch model training is:
Loss Accuracy Val_Loss Val_Acc
0 0.697899 0.505874 0.692495 0.511629
1 0.693063 0.503477 0.693186 0.503637
2 0.693190 0.496044 0.693149 0.499201
3 0.693181 0.501359 0.693082 0.502038
4 0.693169 0.503237 0.693234 0.495964
5 0.693177 0.500240 0.693154 0.500679
6 0.693069 0.507473 0.693258 0.498881
7 0.693948 0.500320 0.693145 0.501598
8 0.693196 0.499640 0.693164 0.496324
9 0.693170 0.500759 0.693140 0.501918
Couple things: the accuracy hovers around guessing (this is a binary classification task), no matter how many epochs have passed. Secondly, the training loss barely improves. I set the learning rate to the default learning rate described by tensorflow's Adam Optimizer docs. What else am I missing here? I had some trouble with the input / output dimensions for the various layers - did I mess those up at all?
Some observations:
Use BCEWithLogitsLoss as loss on the output of the last linear layer, before the sigmoid. This includes the sigmoid activation in a more numerically stable fashion.
The TensorFlow model has a ReLU after the Convolution, the pytorch implementations does not.
In general, for debugging, one might want to look at weight.grad of some of your weights after the loss.backward() and see if gradients calculated. Also printing out the value of one of the weights in each iteration to see if your optimizer actually changes the weights can help...
Also, it can depend on the input data:
(Are you sure that x_test is scaled correctly?)
If you are transforming your inputs to Long before embedding them and all x_test, for example, are floats between 0 and 1, they will all be converted to 0! And the network will have a hard time predicting the labels from all zeros as constant input!
But now to the actual issue in this particular case:
Be careful with .view! It might not do what you expect. It just reshapes the tensor but does not move the data around.
What you really want is .moveaxes(-1,2) instead!!
Loss Accuracy Val_Loss Val_Acc
0 0.573489 0.671715 0.402601 0.819413
1 0.376908 0.830163 0.33786 0.850783
2 0.308343 0.868646 0.296171 0.872323
3 0.258806 0.893342 0.319121 0.865849
4 0.227044 0.907649 0.3172 0.868326
5 0.202789 0.918478 0.281184 0.886549
6 0.179744 0.928549 0.291027 0.886589
7 0.161205 0.93702 0.329196 0.879156
8 0.145447 0.944094 0.294914 0.889746
9 0.133034 0.949568 0.291476 0.889826
After adding the relu after the convolution and, more importantly, fixing the view!
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
x = self.pool(x).squeeze(-1)
x = self.activation(self.dropout2(self.linear1(x)))
x = self.activation2(self.output(x))
return x
What is tinymodel you init opt with in fit function:
opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)
It seems like your optimizer is not working on the right model (see this answer on the relation between the optimizer and the parameters of the model).
You need to replace this line in fit function:
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
# ...
Additionally, you are using Dropout layer that has different behavior in train and test.
You should add self.model.train() and self.model.eval() at the beginning of your fit and predict_proba functions respectively.

Why is the tensorflow maxout not calculating the gradient respectively where is the mistake?

I am trying to use the tensorflow maxout implementation (https://www.tensorflow.org/addons/api_docs/python/tfa/layers/Maxout) but struggle with it;
I try to illustrate my problem: If I have the following
d=3
x_in=Input(shape=d)
x_out=Dense(d, activation='relu')(x_in)
model = Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
Then it is working normally, i.e. the loss is continuously getting smaller and I can get the estimated weights:
model.layers[1].get_weights()
Out[141]:
[array([[-0.15133516, -0.14892222, -0.64674205],
[ 0.34437487, 0.7822309 , -0.08931279],
[-0.8330534 , -0.13827904, -0.23096593]], dtype=float32),
array([-0.03069788, -0.03311999, -0.02603031], dtype=float32)]
However, when I want to use a maxout activation instead, things do not work out
d=3
x_in=Input(shape=d)
x_out = tfa.layers.Maxout(3)(x_in)
model = Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
The loss stays constant for all Epochs and
model.layers[1].get_weights()
Out[141]: []
Where is my mistake?
It will only work in combination with another layer, for example a Dense layer. Also, the Maxout layer itself does not have any trainable weights as you can see in the model summary but it does have a hyperparameter num_units:
import tensorflow as tf
import tensorflow_addons as tfa
d=3
x_in=tf.keras.layers.Input(shape=d)
x = tf.keras.layers.Dense(3)(x_in)
x_out = tfa.layers.Maxout(3)(x)
model = tf.keras.Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
print(model.summary())
Epoch 1/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0404
Epoch 2/5
7/7 [==============================] - 0s 3ms/step - loss: 1.0361
Epoch 3/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0322
Epoch 4/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0283
Epoch 5/5
7/7 [==============================] - 0s 3ms/step - loss: 1.0244
Model: "model_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_6 (InputLayer) [(None, 3)] 0
dense_5 (Dense) (None, 3) 12
maxout_4 (Maxout) (None, 3) 0
=================================================================
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________
None
Maybe also take a look at the paper regarding Maxout:
The maxout model is simply a feed-forward achitecture, such as a multilayer perceptron or deep convolutional neural network, that uses a new type of activation function: the maxout unit.

Tensorflow save and load_model not working but save and load_weights does

I am using tensorflow version 2.8.0:
I have seen this issue from multiple sources all over forums, githubs, and even some here for the past 5 years with no definitive answer that has worked for me... For some reason, in certain situations, a loaded model from a previous save yields very different results from the original model evaluation. I haven't seen any well documented and investigative questions about this so I thought I'd show my full code below (simple illustration of the issue).
This is an application of transfer learning from a pre-trained tensorflow model. The model is first trained through 5 epochs on train_data, then fine tuned (with more trainable params) for 5 more. Evaluating the model on test_data shows an accuracy of 0.5671. The model is then saved and loaded in .h5 format (I have also tried the tf SavedModel format and the result is the same). The resultant loaded_model yields an evaluation accuracy on the same, unaltered test_data of 0.4535.
The result should be the same (0.5671)... so to further investigate I decided to save the fine tuned model's weights independently, construct and compile the same model architecture in new_model, and load the saved model's weights into new_model. Evaluating new_model yields the correct result, an accuracy of 0.5671. ----- Okay, so it must be the weights not saving properly right? I pulled the weights from each of these three models (model, loaded_model, new_model) and compared their flattened results. They are all the same. I really have no idea what's going on here but I'm assuming it is not random initialization, because the loaded_model evaluation results really did not perform anywhere near the fine tuned model - I would assume they would converge much closer.
import tensorflow as tf
tf.random.set_seed(42)
import pandas as pd
import numpy as np
import os
import pathlib
data_dir = pathlib.Path("101_food_classes_10_percent/train")
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
train_dir = './101_food_classes_10_percent/train/'
test_dir = './101_food_classes_10_percent/test/'
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen=ImageDataGenerator()
train_data = datagen.flow_from_directory(directory = train_dir,
target_size = (224,224),
batch_size = 32,
class_mode='categorical')
test_data = datagen.flow_from_directory(directory = test_dir,
target_size = (224,224),
batch_size = 32,
class_mode='categorical')
from tensorflow.keras.layers.experimental import preprocessing
data_augmentation = tf.keras.Sequential([
preprocessing.RandomFlip('horizontal'),
preprocessing.RandomRotation(0.2),
preprocessing.RandomZoom(0.2),
preprocessing.RandomHeight(0.2),
preprocessing.RandomWidth(0.2)
#preprocessing.Rescaling(1/255.) in EfficientNet it's already scaled but could use this for non-scaled
], name = 'data_augmentation')
Found 7575 images belonging to 101 classes.
Found 25250 images belonging to 101 classes.
# Build headless model - Feature Extraction
# Setup base with frozen layers
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable=False
inputs = tf.keras.layers.Input(shape = (224,224,3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x) # Pool base_model's outputs into a feature vector
outputs = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)
model = tf.keras.Model(inputs,outputs)
model.compile('Adam', 'categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 129,381
Non-trainable params: 4,049,571
_________________________________________________________________
history = model.fit(train_data, validation_data=test_data,
validation_steps=int(0.15*len(test_data)),
epochs=5, callbacks = [checkpoint_callback])
Epoch 1/5
237/237 [==============================] - 63s 230ms/step - loss: 3.4712 - accuracy: 0.2482 - val_loss: 2.4446 - val_accuracy: 0.4497
Epoch 2/5
237/237 [==============================] - 52s 221ms/step - loss: 2.3575 - accuracy: 0.4561 - val_loss: 2.0051 - val_accuracy: 0.5093
Epoch 3/5
237/237 [==============================] - 51s 216ms/step - loss: 1.9838 - accuracy: 0.5265 - val_loss: 1.8313 - val_accuracy: 0.5360
Epoch 4/5
237/237 [==============================] - 51s 212ms/step - loss: 1.7497 - accuracy: 0.5761 - val_loss: 1.7417 - val_accuracy: 0.5461
Epoch 5/5
237/237 [==============================] - 53s 221ms/step - loss: 1.6035 - accuracy: 0.6141 - val_loss: 1.7012 - val_accuracy: 0.5601
model.evaluate(test_data)
790/790 [==============================] - 87s 110ms/step - loss: 1.7294 - accuracy: 0.5481
[1.7294203042984009, 0.5480791926383972]
# Fine tuning: unfreeze some layers, lower leaning rate by 10x
base_model.trainable=True
# Refreeze every layer except last 5, adjust tiner tuned features down the model
for layer in base_model.layers[:-5]:
layer.trainable=False
# recompile and lower learning rate by 10x
model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001), 'categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
# Fine Tune for 5 more epochs starting with last epoch left off at:
fine_tune_epochs=10 # Total number of epochs we're after: 5 feature extraction, 5 fine tuning
history_fine_tune = model.fit(train_data,
validation_data = test_data,
validation_steps=int(0.15*len(test_data)),
epochs = fine_tune_epochs,
initial_epoch = history.epoch[-1])
Epoch 5/10
237/237 [==============================] - 59s 220ms/step - loss: 1.3571 - accuracy: 0.6543 - val_loss: 1.6403 - val_accuracy: 0.5567
Epoch 6/10
237/237 [==============================] - 51s 213ms/step - loss: 1.2478 - accuracy: 0.6688 - val_loss: 1.6805 - val_accuracy: 0.5596
Epoch 7/10
237/237 [==============================] - 46s 193ms/step - loss: 1.1424 - accuracy: 0.6964 - val_loss: 1.6352 - val_accuracy: 0.5736
Epoch 8/10
237/237 [==============================] - 45s 191ms/step - loss: 1.0902 - accuracy: 0.7065 - val_loss: 1.6494 - val_accuracy: 0.5657
Epoch 9/10
237/237 [==============================] - 46s 193ms/step - loss: 1.0229 - accuracy: 0.7275 - val_loss: 1.6348 - val_accuracy: 0.5633
Epoch 10/10
237/237 [==============================] - 45s 191ms/step - loss: 0.9704 - accuracy: 0.7434 - val_loss: 1.6990 - val_accuracy: 0.5670
model.evaluate(test_data)
790/790 [==============================] - 83s 105ms/step - loss: 1.6578 - accuracy: 0.5671
[1.657836675643921, 0.5670890808105469]
model.save("./101_food_classes_10_percent/big_modelh5")
loaded_model = tf.keras.models.load_model("./101_food_classes_10_percent/big_modelh5.h5")
loaded_model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
loaded_model.evaluate(test_data)
790/790 [==============================] - 85s 104ms/step - loss: 2.1780 - accuracy: 0.4535 - loss: 2.1790 - accuracy
[2.1780412197113037, 0.4534653425216675]
# Try save_weights to another model
model.save_weights('my_model_weights.h5')
inputs = tf.keras.layers.Input(shape = (224,224,3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x) # Pool base_model's outputs into a feature vector
outputs = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)
new_model = tf.keras.Model(inputs,outputs)
new_model.compile('Adam', 'categorical_crossentropy', metrics=['accuracy'])
new_model.summary()
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_5 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_2 ( (None, 1280) 0
_________________________________________________________________
dense_2 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
new_model.load_weights('my_model_weights.h5')
# Saving weights works... but not save and load_model
new_model.evaluate(test_data)
790/790 [==============================] - 88s 109ms/step - loss: 1.6578 - accuracy: 0.5671
[1.6578353643417358, 0.5670890808105469]
# Check if weights are the same?
m1 = model.get_weights()
m2 = new_model.get_weights()
m3 = loaded_model.get_weights()
len(m1)==len(m2)==len(m3)
True
from collections.abc import Iterable
def flatten(l):
for el in l:
if isinstance(el, Iterable) and not isinstance(el, (str, bytes)):
yield from flatten(el)
else:
yield el
m1 = flatten(m1)
m2 = flatten(m2)
m3 = flatten(m3)
print(list(m1)==list(m2))
print(list(m1)==list(m3))
True
True
This is because you have not saved your entire model using .h5 extension, but you are using .h5 for saving the weights. Please check below code section:
model.save("./101_food_classes_10_percent/big_modelh5") # add .h5
loaded_model = tf.keras.models.load_model("./101_food_classes_10_percent/big_modelh5.h5")
loaded_model.summary()
Use this code to save the entire model to a HDF5 file format and try again loading it:
model.save("./101_food_classes_10_percent/big_modelh5.h5")
Check this for more details on saving model in .hdf5 format.

Keras Sequential to Functional API

I am new to deep learning and have been trying to convert the Keras sequential API to the functional API running on the CIFAR10 image dataset but have been having some difficulty. I've converted the model which looks the same except for the input layer yet the sequential has an average accuracy of around ~70% and my functional has an average accuracy of around ~10%. I would really appreciate some help with regards to figuring out what is going wrong. Here is my functional code:
import tensorflow as tf
from tensorflow import keras
from keras import datasets, layers, models
from keras.models import Model, Input, Sequential
import matplotlib.pyplot as plt
Download and prepare:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
input_shape = train_images[0,:,:,:].shape
Create model:
input = layers.Input(shape=input_shape)
x = layers.Conv2D(32, (3, 3), activation='relu',padding='valid')(input)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(10)(x)
model = Model(input, x, name='Functional')
Compile and train:
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10,
validation_data=(test_images, test_labels))
Here is a link to the original sequential CNN which is a google collaboratory notebook. I would really appreciate any help in trying to understand and fix what is going wrong. Thank you in advance.
There seems to be some issues with SparseCategoricalCrossentropy loss.
Check this: https://github.com/tensorflow/tensorflow/issues/38632
The following model gives good accuracy:
import tensorflow as tf
from tensorflow import keras
from keras import datasets, layers, models
from keras.models import Model, Input, Sequential
import matplotlib.pyplot as plt
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
train_labels, test_labels = tf.keras.utils.to_categorical(train_labels, 10) , tf.keras.utils.to_categorical(test_labels, 10)
input_shape = train_images[0,:,:,:].shape
input = layers.Input(shape=input_shape)
x = layers.Conv2D(32, (3, 3), activation='relu',padding='valid')(input)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(10, activation='softmax')(x)
model = Model(input, x, name='Functional')
model.summary()
model.compile(optimizer='adam',
loss=loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10,
validation_data=(test_images, test_labels))
conv2d_16 (Conv2D) (None, 30, 30, 32) 896
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 15, 15, 32) 0
_________________________________________________________________
conv2d_17 (Conv2D) (None, 13, 13, 64) 18496
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 6, 6, 64) 0
_________________________________________________________________
conv2d_18 (Conv2D) (None, 4, 4, 64) 36928
_________________________________________________________________
flatten_6 (Flatten) (None, 1024) 0
_________________________________________________________________
dense_11 (Dense) (None, 64) 65600
_________________________________________________________________
dense_12 (Dense) (None, 10) 650
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/10
50000/50000 [==============================] - 15s 305us/step - loss: 1.4870 - accuracy: 0.4600 - val_loss: 1.2874 - val_accuracy: 0.5488
Epoch 2/10
50000/50000 [==============================] - 15s 301us/step - loss: 1.1365 - accuracy: 0.5989 - val_loss: 1.0789 - val_accuracy: 0.6191
Epoch 3/10
50000/50000 [==============================] - 15s 301us/step - loss: 0.9869 - accuracy: 0.6547 - val_loss: 0.9506 - val_accuracy: 0.6700
Epoch 4/10
50000/50000 [==============================] - 15s 301us/step - loss: 0.8896 - accuracy: 0.6907 - val_loss: 0.9509 - val_accuracy: 0.6695
Epoch 5/10
50000/50000 [==============================] - 16s 311us/step - loss: 0.8135 - accuracy: 0.7151 - val_loss: 0.8688 - val_accuracy: 0.7046
Epoch 6/10
50000/50000 [==============================] - 15s 303us/step - loss: 0.7566 - accuracy: 0.7351 - val_loss: 0.8411 - val_accuracy: 0.7141

How to add Gaussian noise with varying std during training?

I am training a CNN using keras and tensorflow. I would like to add Gaussian noise to my input data during training and reduce the percentage of the noise in further steps. What I do right now, I use:
from tensorflow.python.keras.layers import Input, GaussianNoise, BatchNormalization
inputs = Input(shape=x_train_n.shape[1:])
bn0 = BatchNormalization(axis=1, scale=True)(inputs)
g0 = GaussianNoise(0.5)(bn0)
The variable that GaussianNoise takes is the standard deviation of the noise distribution and I couldn't assign a dynamic value to it, how can I add for example a noise, and then decrease this value based on the epoch that I am in?
You can simply design a custom callback which changes the stddev before training for a epoch.
Reference:
https://www.tensorflow.org/api_docs/python/tf/keras/layers/GaussianNoise
https://www.tensorflow.org/guide/keras/custom_callback
from tensorflow.keras.layers import Input, Dense, Add, Activation
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import random
from tensorflow.python.keras.layers import Input, GaussianNoise, BatchNormalization
inputs = Input(shape=100)
bn0 = BatchNormalization(axis=1, scale=True)(inputs)
g0 = GaussianNoise(0.5)(bn0)
d0 = Dense(10)(g0)
model = Model(inputs, d0)
model.compile('adam', 'mse')
model.summary()
class MyCustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs=None):
self.model.layers[2].stddev = random.uniform(0, 1)
print('updating sttdev in training')
print(self.model.layers[2].stddev)
X_train = np.zeros((10,100))
y_train = np.zeros((10,10))
noise_change = MyCustomCallback()
model.fit(X_train,
y_train,
batch_size=32,
epochs=5,
callbacks = [noise_change])
Model: "model_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_6 (InputLayer) [(None, 100)] 0
_________________________________________________________________
batch_normalization_5 (Batch (None, 100) 400
_________________________________________________________________
gaussian_noise_5 (GaussianNo (None, 100) 0
_________________________________________________________________
dense_5 (Dense) (None, 10) 1010
=================================================================
Total params: 1,410
Trainable params: 1,210
Non-trainable params: 200
_________________________________________________________________
Epoch 1/5
updating sttdev in training
0.984045691131548
1/1 [==============================] - 0s 1ms/step - loss: 1.6031
Epoch 2/5
updating sttdev in training
0.02821459469022025
1/1 [==============================] - 0s 742us/step - loss: 1.5966
Epoch 3/5
updating sttdev in training
0.6102984511769268
1/1 [==============================] - 0s 1ms/step - loss: 1.8818
Epoch 4/5
updating sttdev in training
0.021155188690323512
1/1 [==============================] - 0s 1ms/step - loss: 1.2032
Epoch 5/5
updating sttdev in training
0.35950227285165115
1/1 [==============================] - 0s 2ms/step - loss: 1.8817
<tensorflow.python.keras.callbacks.History at 0x7fc67ce9e668>

Categories

Resources