I'm trying to get the input weight on each layer, including the lstm 1, lstm 2, and weight after the attention layer, and want to display them using a heatmap. But when I run the code, the following error appears. What happened? Because the layer exists.
Here is the code:
model.add(LSTM(32, input_shape=(n_timesteps,n_features), return_sequences=True))
#print weights
print(model.get_layer(LSTM).get_weights()[0])
model.add(LSTM(32, input_shape=(n_timesteps,n_features), return_sequences=True))
model.add(Dropout(0.1))
model.add(attention(return_sequences=False)) # receive 3D and output 2D
model.add(Dense(n_outputs, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
Attention layer:
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
And this is the error that appears:
ValueError: No such layer: <class 'keras.layers.recurrent_v2.LSTM'>. Existing layers are [<keras.layers.recurrent_v2.LSTM object at 0x7f7b5c215910>].
You can get certain layer weights using model.layers after defining your whole model:
import tensorflow as tf
import seaborn as sb
import matplotlib.pyplot as plt
class attention(tf.keras.layers.Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = tf.keras.backend.tanh(tf.keras.backend.dot(x,self.W)+self.b)
a = tf.keras.backend.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return tf.keras.backend.sum(output, axis=1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(32, input_shape=(5,10), return_sequences=True))
model.add(tf.keras.layers.LSTM(32, return_sequences=True))
model.add(tf.keras.layers.Dropout(0.1))
model.add(attention(return_sequences=False)) # receive 3D and output 2D
model.add(tf.keras.layers.Dense(3, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
trainx = tf.random.normal((25, 5, 10))
trainy = tf.random.uniform((25, 3), maxval=3)
model.fit(trainx, trainy, epochs=5, batch_size=4)
lstm1_weights = model.layers[0].get_weights()[0]
lstm2_weights = model.layers[1].get_weights()[0]
attention_weights = model.layers[3].get_weights()[0]
heat_map = sb.heatmap(lstm1_weights)
plt.show()
Model: "sequential_16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_24 (LSTM) (None, 5, 32) 5504
lstm_25 (LSTM) (None, 5, 32) 8320
dropout_12 (Dropout) (None, 5, 32) 0
attention_12 (attention) (None, 32) 37
dense_12 (Dense) (None, 3) 99
=================================================================
Total params: 13,960
Trainable params: 13,960
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
7/7 [==============================] - 4s 10ms/step - loss: 5.5033 - accuracy: 0.4400
Epoch 2/5
7/7 [==============================] - 0s 8ms/step - loss: 5.4899 - accuracy: 0.5200
Epoch 3/5
7/7 [==============================] - 0s 9ms/step - loss: 5.4771 - accuracy: 0.4800
Epoch 4/5
7/7 [==============================] - 0s 9ms/step - loss: 5.4701 - accuracy: 0.5200
Epoch 5/5
7/7 [==============================] - 0s 8ms/step - loss: 5.4569 - accuracy: 0.5200
If you want to see how the weights of your layers change during training, you should define a callback as shown in this post.
Related
I'm trying to port a tensorflow neural network to pytorch, as an exercise to familiarize myself with both / their nuances. This is the tensorflow network I'm porting to pytorch:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding="post")
x_test = sequence.pad_sequences(x_test, maxlen=400, padding="post")
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding='valid',activation='relu',strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))
The shapes of each layer is shown below:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 400, 50) 250000
dropout (Dropout) (None, 400, 50) 0
conv1d (Conv1D) (None, 398, 250) 37750
global_max_pooling1d (Globa (None, 250) 0
lMaxPooling1D)
dense (Dense) (None, 250) 62750
dropout_1 (Dropout) (None, 250) 0
activation (Activation) (None, 250) 0
dense_1 (Dense) (None, 1) 251
activation_1 (Activation) (None, 1) 0
=================================================================
Total params: 350,751
Trainable params: 350,751
Non-trainable params: 0
And the output of the tensorflow model is:
Epoch 1/10
loss: 0.4043 - accuracy: 0.8021 - val_loss: 0.2764 - val_accuracy: 0.8854
Epoch 2/10
loss: 0.2332 - accuracy: 0.9052 - val_loss: 0.2690 - val_accuracy: 0.8888
Epoch 3/10
loss: 0.1598 - accuracy: 0.9389 - val_loss: 0.2948 - val_accuracy: 0.8832
Epoch 4/10
loss: 0.1112 - accuracy: 0.9600 - val_loss: 0.3015 - val_accuracy: 0.8906
Epoch 5/10
loss: 0.0810 - accuracy: 0.9700 - val_loss: 0.3057 - val_accuracy: 0.8868
Epoch 6/10
loss: 0.0537 - accuracy: 0.9811 - val_loss: 0.4055 - val_accuracy: 0.8868
Epoch 7/10
loss: 0.0408 - accuracy: 0.9860 - val_loss: 0.4083 - val_accuracy: 0.8852
Epoch 8/10
loss: 0.0411 - accuracy: 0.9845 - val_loss: 0.4789 - val_accuracy: 0.8789
Epoch 9/10
loss: 0.0380 - accuracy: 0.9862 - val_loss: 0.4828 - val_accuracy: 0.8827
Epoch 10/10
loss: 0.0329 - accuracy: 0.9879 - val_loss: 0.4999 - val_accuracy: 0.8825
Here's what I have in my PyTorch port over:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
class CustomDataset(Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
x = self.pool(x)
x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1]))))
x = self.activation2(self.output(x))
return x
class FitTorchModel():
def __init__(self, model, num_epochs=10, steps_per_epoch=782):
self.model = model
self.epochs = num_epochs
self.steps_per_epoch = steps_per_epoch
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
crit = torch.nn.BCELoss(reduction = "mean")
history_df = pd.DataFrame(columns = ["Loss", "Accuracy", "Val_Loss", "Val_Acc"])
for epoch in range(self.epochs):
self.model.train()
print(f"Epoch {epoch}")
epoch_loss = 0
epoch_acc = 0
it = iter(train_dataloader)
for step in tqdm(range(self.steps_per_epoch)):
opt.zero_grad()
x, y = next(it)
y_pred = self.model(x).view(-1)
loss = crit(y_pred, y)
epoch_loss += loss.item()
epoch_acc += accuracy_score(y==1, y_pred > 0.5)
loss.backward()
opt.step()
val_loss, val_acc = self.predict_proba(test_dataloader, crit)
df = pd.DataFrame({"Loss": epoch_loss/(step+1),
"Accuracy": epoch_acc/(step+1),
"Val_Loss": val_loss, "Val_Acc": val_acc}, index=[0])
history_df = pd.concat((history_df, df), ignore_index=True)
return history_df
def predict_proba(self, test_dataloader, crit):
self.model.eval()
val_loss = 0
val_acc = 0
it = iter(test_dataloader)
with torch.no_grad():
for step in tqdm(range(self.steps_per_epoch)):
x,y = next(it)
y_pred = self.model(x).view(-1)
batch_loss = crit(y_pred, y)
val_loss += batch_loss.item()
val_acc += accuracy_score(y==1, y_pred > 0.5)
return val_loss/(step+1), val_acc/(step+1)
ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)
The shape of each layer is:
After embedding layer: torch.Size([32, 400, 50])
After dropout1 layer: torch.Size([32, 400, 50])
After convolution1d layer: torch.Size([32, 250, 398])
After maxpooling layer: torch.Size([32, 250, 1])
After linear1 layer: torch.Size([32, 250])
After dropout2 layer: torch.Size([32, 250])
After activation layer: torch.Size([32, 250])
After output layer: torch.Size([32, 1])
After activation2 layer: torch.Size([32, 1])
The output of the pytorch model training is:
Loss Accuracy Val_Loss Val_Acc
0 0.697899 0.505874 0.692495 0.511629
1 0.693063 0.503477 0.693186 0.503637
2 0.693190 0.496044 0.693149 0.499201
3 0.693181 0.501359 0.693082 0.502038
4 0.693169 0.503237 0.693234 0.495964
5 0.693177 0.500240 0.693154 0.500679
6 0.693069 0.507473 0.693258 0.498881
7 0.693948 0.500320 0.693145 0.501598
8 0.693196 0.499640 0.693164 0.496324
9 0.693170 0.500759 0.693140 0.501918
Couple things: the accuracy hovers around guessing (this is a binary classification task), no matter how many epochs have passed. Secondly, the training loss barely improves. I set the learning rate to the default learning rate described by tensorflow's Adam Optimizer docs. What else am I missing here? I had some trouble with the input / output dimensions for the various layers - did I mess those up at all?
Some observations:
Use BCEWithLogitsLoss as loss on the output of the last linear layer, before the sigmoid. This includes the sigmoid activation in a more numerically stable fashion.
The TensorFlow model has a ReLU after the Convolution, the pytorch implementations does not.
In general, for debugging, one might want to look at weight.grad of some of your weights after the loss.backward() and see if gradients calculated. Also printing out the value of one of the weights in each iteration to see if your optimizer actually changes the weights can help...
Also, it can depend on the input data:
(Are you sure that x_test is scaled correctly?)
If you are transforming your inputs to Long before embedding them and all x_test, for example, are floats between 0 and 1, they will all be converted to 0! And the network will have a hard time predicting the labels from all zeros as constant input!
But now to the actual issue in this particular case:
Be careful with .view! It might not do what you expect. It just reshapes the tensor but does not move the data around.
What you really want is .moveaxes(-1,2) instead!!
Loss Accuracy Val_Loss Val_Acc
0 0.573489 0.671715 0.402601 0.819413
1 0.376908 0.830163 0.33786 0.850783
2 0.308343 0.868646 0.296171 0.872323
3 0.258806 0.893342 0.319121 0.865849
4 0.227044 0.907649 0.3172 0.868326
5 0.202789 0.918478 0.281184 0.886549
6 0.179744 0.928549 0.291027 0.886589
7 0.161205 0.93702 0.329196 0.879156
8 0.145447 0.944094 0.294914 0.889746
9 0.133034 0.949568 0.291476 0.889826
After adding the relu after the convolution and, more importantly, fixing the view!
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
x = self.pool(x).squeeze(-1)
x = self.activation(self.dropout2(self.linear1(x)))
x = self.activation2(self.output(x))
return x
What is tinymodel you init opt with in fit function:
opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)
It seems like your optimizer is not working on the right model (see this answer on the relation between the optimizer and the parameters of the model).
You need to replace this line in fit function:
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
# ...
Additionally, you are using Dropout layer that has different behavior in train and test.
You should add self.model.train() and self.model.eval() at the beginning of your fit and predict_proba functions respectively.
I am trying to use the tensorflow maxout implementation (https://www.tensorflow.org/addons/api_docs/python/tfa/layers/Maxout) but struggle with it;
I try to illustrate my problem: If I have the following
d=3
x_in=Input(shape=d)
x_out=Dense(d, activation='relu')(x_in)
model = Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
Then it is working normally, i.e. the loss is continuously getting smaller and I can get the estimated weights:
model.layers[1].get_weights()
Out[141]:
[array([[-0.15133516, -0.14892222, -0.64674205],
[ 0.34437487, 0.7822309 , -0.08931279],
[-0.8330534 , -0.13827904, -0.23096593]], dtype=float32),
array([-0.03069788, -0.03311999, -0.02603031], dtype=float32)]
However, when I want to use a maxout activation instead, things do not work out
d=3
x_in=Input(shape=d)
x_out = tfa.layers.Maxout(3)(x_in)
model = Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
The loss stays constant for all Epochs and
model.layers[1].get_weights()
Out[141]: []
Where is my mistake?
It will only work in combination with another layer, for example a Dense layer. Also, the Maxout layer itself does not have any trainable weights as you can see in the model summary but it does have a hyperparameter num_units:
import tensorflow as tf
import tensorflow_addons as tfa
d=3
x_in=tf.keras.layers.Input(shape=d)
x = tf.keras.layers.Dense(3)(x_in)
x_out = tfa.layers.Maxout(3)(x)
model = tf.keras.Model(inputs=x_in, outputs=x_out)
model.compile(optimizer='adam', loss='MeanAbsoluteError')
X=tf.random.normal([200,3])
Y=tf.random.normal([200,3])
model.fit(X, Y, epochs=5, batch_size=32)
print(model.summary())
Epoch 1/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0404
Epoch 2/5
7/7 [==============================] - 0s 3ms/step - loss: 1.0361
Epoch 3/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0322
Epoch 4/5
7/7 [==============================] - 0s 2ms/step - loss: 1.0283
Epoch 5/5
7/7 [==============================] - 0s 3ms/step - loss: 1.0244
Model: "model_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_6 (InputLayer) [(None, 3)] 0
dense_5 (Dense) (None, 3) 12
maxout_4 (Maxout) (None, 3) 0
=================================================================
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________
None
Maybe also take a look at the paper regarding Maxout:
The maxout model is simply a feed-forward achitecture, such as a multilayer perceptron or deep convolutional neural network, that uses a new type of activation function: the maxout unit.
When trying to get confusion matrix for a ConvNet constantly getting the same error.
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import numpy as np
from keras.preprocessing import image
from sklearn.metrics import classification_report, confusion_matrix
img_width, img_height = 150, 150
train_data_dir = "train"
validation_data_dir = "test"
nb_train_samples = 2000
nb_validation_samples = 400
epochs = 50
batch_size = 40 #16
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
train_datagen = ImageDataGenerator(
rescale = 1. / 255,
shear_range = 0.2,
zoom_range= 0.2,
horizontal_flip= True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = test_datagen.flow_from_directory(
train_data_dir,
target_size= (img_width, img_height),
batch_size= batch_size,
class_mode= 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size= (img_width, img_height),
batch_size= batch_size,
class_mode= 'binary')`
Applying CNN Layers ...
model.compile(loss= 'binary_crossentropy',
optimizer= 'rmsprop',
metrics= ['accuracy'] )
`model.fit_generator(
train_generator,
steps_per_epoch= nb_train_samples // batch_size,
epochs= epochs,
validation_data= validation_generator,
validation_steps= nb_validation_samples // batch_size)
Y_pred = model.predict_generator(validation_generator, nb_validation_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))`
Getting error mentioned below but don't know how to resolve it
ValueError: Found input variables with inconsistent numbers of samples: [400, 440]
I am able to recreate your error using Dogs_Vs_Cats dataset. Where i have 2000 samples in train directory and 400 samples in validation directory.
Please change model.predict_generator from
Y_pred = model.predict_generator(validation_generator, nb_validation_samples // batch_size+1)
to
Y_pred = model.predict_generator(validation_generator, nb_validation_samples // batch_size)
will resolve this ValueError: Found input variables with inconsistent numbers of samples: [400, 440]
Please refer complete code as below
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import numpy as np
from keras.preprocessing import image
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive
drive.mount('/content/drive')
train_data_dir = '/content/drive/My Drive/Dogs_Vs_Cats/train'
validation_data_dir = '/content/drive/My Drive/Dogs_Vs_Cats/validation'
img_width, img_height = 150, 150
nb_train_samples = 2000
nb_validation_samples = 400
epochs = 10
batch_size = 40 #16
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
train_datagen = ImageDataGenerator(
rescale = 1. / 255,
shear_range = 0.2,
zoom_range= 0.2,
horizontal_flip= True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = test_datagen.flow_from_directory(
train_data_dir,
target_size= (img_width, img_height),
batch_size= batch_size,
class_mode= 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size= (img_width, img_height),
batch_size= batch_size,
class_mode= 'binary')
model = Sequential()
model.add(Conv2D(32, (3, 3), strides = (1, 1), input_shape = input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), strides = (1, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss = 'binary_crossentropy',
optimizer = 'rmsprop',
metrics = ['accuracy'])
model.fit_generator(
train_generator,
steps_per_epoch= nb_train_samples // batch_size,
epochs= epochs,
validation_data= validation_generator,
validation_steps= nb_validation_samples // batch_size)
Y_pred = model.predict_generator(validation_generator, nb_validation_samples // batch_size)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
Output:
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_9 (Conv2D) (None, 148, 148, 32) 896
_________________________________________________________________
activation_9 (Activation) (None, 148, 148, 32) 0
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 74, 74, 32) 0
_________________________________________________________________
conv2d_10 (Conv2D) (None, 72, 72, 64) 18496
_________________________________________________________________
activation_10 (Activation) (None, 72, 72, 64) 0
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 36, 36, 64) 0
_________________________________________________________________
flatten_5 (Flatten) (None, 82944) 0
_________________________________________________________________
dense_9 (Dense) (None, 64) 5308480
_________________________________________________________________
dropout_5 (Dropout) (None, 64) 0
_________________________________________________________________
dense_10 (Dense) (None, 1) 65
=================================================================
Total params: 5,327,937
Trainable params: 5,327,937
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
50/50 [==============================] - 12s 233ms/step - loss: 0.9345 - accuracy: 0.5375 - val_loss: 0.6303 - val_accuracy: 0.5225
Epoch 2/10
50/50 [==============================] - 11s 226ms/step - loss: 0.6745 - accuracy: 0.5965 - val_loss: 0.6094 - val_accuracy: 0.6725
Epoch 3/10
50/50 [==============================] - 11s 223ms/step - loss: 0.6196 - accuracy: 0.6605 - val_loss: 0.5694 - val_accuracy: 0.7150
Epoch 4/10
50/50 [==============================] - 11s 223ms/step - loss: 0.5501 - accuracy: 0.7285 - val_loss: 0.6216 - val_accuracy: 0.7225
Epoch 5/10
50/50 [==============================] - 11s 221ms/step - loss: 0.4794 - accuracy: 0.7790 - val_loss: 0.6268 - val_accuracy: 0.6025
Epoch 6/10
50/50 [==============================] - 11s 226ms/step - loss: 0.4038 - accuracy: 0.8195 - val_loss: 0.4842 - val_accuracy: 0.6975
Epoch 7/10
50/50 [==============================] - 11s 222ms/step - loss: 0.3207 - accuracy: 0.8595 - val_loss: 0.5600 - val_accuracy: 0.7325
Epoch 8/10
50/50 [==============================] - 13s 257ms/step - loss: 0.2574 - accuracy: 0.8920 - val_loss: 0.9705 - val_accuracy: 0.7525
Epoch 9/10
50/50 [==============================] - 13s 252ms/step - loss: 0.2049 - accuracy: 0.9235 - val_loss: 0.7311 - val_accuracy: 0.7475
Epoch 10/10
50/50 [==============================] - 13s 251ms/step - loss: 0.1448 - accuracy: 0.9515 - val_loss: 1.0541 - val_accuracy: 0.7150
Confusion Matrix
[[200 0]
[200 0]]
Hope this answers your question. If not please share complete traceback and code for debug, i am happy to help you.
I've been working on a data set (1000,3253) using a CNN. I'm running gradient calculations through gradient tape but it keeps running out of memory. Yet if I remove the line appending a gradient calculation to a list the script runs through all the epochs. I'm not entirely sure why this would happen but I am also new to tensorflow and the use of gradient tape. Any advice or input would be appreciated
#create a batch loop
for x, y_true in train_dataset:
#create a tape to record actions
with tf.GradientTape(watch_accessed_variables=False) as tape:
x_var = tf.Variable(x)
tape.watch([model.trainable_variables,x_var])
y_pred = model(x_var,training=True)
tape.stop_recording()
loss = los_func(y_true, y_pred)
epoch_loss_avg.update_state(loss)
epoch_accuracy.update_state(y_true, y_pred)
#pdb.set_trace()
gradients,something = tape.gradient(loss, (model.trainable_variables,x_var))
#sa_input.append(tape.gradient(loss, x_var))
del tape
#apply gradients
sa_input.append(something)
opti_func.apply_gradients(zip(gradients, model.trainable_variables))
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
As you are new to TF2, would recommend to go through this guide. This guide covers training, evaluation, and prediction (inference) models in TensorFlow 2.0 in two broad situations:
When using built-in APIs for training & validation (such as model.fit(), model.evaluate(), model.predict()). This is covered in the section "Using built-in training & evaluation loops".
When writing custom loops from scratch using eager execution and the GradientTape object. This is covered in the section "Writing your own training & evaluation loops from scratch".
Below is a program where I am computing the gradients after every epoch and appending to a list. At end of the program I am converting the list to array for simplicity.
Code - This program throws OOM Error error if I use a deep network of many layers and bigger filter size
# Importing dependency
%tensorflow_version 2.x
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import tensorflow as tf
# Import Data
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Build Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32,32, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10))
# Model Summary
model.summary()
# Model Compile
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# Define the Gradient Fucntion
epoch_gradient = []
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Define the Gradient Function
#tf.function
def get_gradient_func(model):
with tf.GradientTape() as tape:
logits = model(train_images, training=True)
loss = loss_fn(train_labels, logits)
grad = tape.gradient(loss, model.trainable_weights)
model.optimizer.apply_gradients(zip(grad, model.trainable_variables))
return grad
# Define the Required Callback Function
class GradientCalcCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
grad = get_gradient_func(model)
epoch_gradient.append(grad)
epoch = 4
print(train_images.shape, train_labels.shape)
model.fit(train_images, train_labels, epochs=epoch, validation_data=(test_images, test_labels), callbacks=[GradientCalcCallback()])
# (7) Convert to a 2 dimensiaonal array of (epoch, gradients) type
gradient = np.asarray(epoch_gradient)
print("Total number of epochs run:", epoch)
Output -
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_12 (Conv2D) (None, 30, 30, 32) 896
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 15, 15, 32) 0
_________________________________________________________________
conv2d_13 (Conv2D) (None, 13, 13, 64) 18496
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 6, 6, 64) 0
_________________________________________________________________
conv2d_14 (Conv2D) (None, 4, 4, 64) 36928
_________________________________________________________________
flatten_4 (Flatten) (None, 1024) 0
_________________________________________________________________
dense_11 (Dense) (None, 64) 65600
_________________________________________________________________
dense_12 (Dense) (None, 10) 650
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________
(50000, 32, 32, 3) (50000, 1)
Epoch 1/4
1563/1563 [==============================] - 109s 70ms/step - loss: 1.7026 - accuracy: 0.4081 - val_loss: 1.4490 - val_accuracy: 0.4861
Epoch 2/4
1563/1563 [==============================] - 145s 93ms/step - loss: 1.2657 - accuracy: 0.5506 - val_loss: 1.2076 - val_accuracy: 0.5752
Epoch 3/4
1563/1563 [==============================] - 151s 96ms/step - loss: 1.1103 - accuracy: 0.6097 - val_loss: 1.1122 - val_accuracy: 0.6127
Epoch 4/4
1563/1563 [==============================] - 152s 97ms/step - loss: 1.0075 - accuracy: 0.6475 - val_loss: 1.0508 - val_accuracy: 0.6371
Total number of epochs run: 4
Hope this answers your question. Happy Learning.
My aim is to use SVD to PCA whiten the latent layer before passing it to the decoder module of an autoencoder. I have used tf.linalg.svd but it does not work since it does not contain necessary Keras parameters. So as a workaround I was trying to wrap it inside Lambda but got this error
AttributeError: 'tuple' object has no attribute 'shape'.
I tried SO (E.g. Using SVD in a custom layer in Keras/tensorflow) and did Google search for SVD in Keras but could not find any answers. I have attached a stripped but functional code here:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from keras.layers import Lambda, Input, Dense, Multiply, Subtract
from keras.models import Model
from keras import backend as K
from keras.losses import mse
from keras import optimizers
from keras.callbacks import EarlyStopping
x = np.random.randn(100, 5)
train_data = preprocessing.scale(x)
input_shape = (5, )
original_dim = train_data.shape[1]
intermediate_dim_1 = 64
intermediate_dim_2 = 16
latent_dim = 2
batch_size = 10
epochs = 15
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
layer_1 = Dense(intermediate_dim_1, activation='tanh') (inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
encoded_layer = Dense(latent_dim, name='latent_layer') (layer_2)
encoder = Model(inputs, encoded_layer, name='encoder')
encoder.summary()
# build decoder model
latent_inputs = Input(shape=(latent_dim,))
layer_1 = Dense(intermediate_dim_1, activation='tanh') (latent_inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
outputs = Dense(original_dim,activation='sigmoid') (layer_2)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# mean removal and pca whitening
meanX = Lambda(lambda x: tf.reduce_mean(x, axis=0, keepdims=True))(encoded_layer)
standardized = Subtract()([encoded_layer, meanX])
sigma2 = K.dot(K.transpose(standardized), standardized)
sigma2 = Lambda(lambda x: x / batch_size)(sigma2)
s, u ,v = tf.linalg.svd(sigma2,compute_uv=True)
# s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon), tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
whitened_encoding = K.dot(standardized, whitening_layer)
# Connect models
z_decoded = decoder(standardized)
# z_decoded = decoder(whitened_encoding)
# Define losses
reconstruction_loss = mse(inputs,z_decoded)
# Instantiate autoencoder
ae = Model(inputs, z_decoded, name='autoencoder')
ae.add_loss(reconstruction_loss)
# callback = EarlyStopping(monitor='val_loss', patience=5)
adam = optimizers.adam(learning_rate=0.002)
ae.compile(optimizer=adam)
ae.summary()
ae.fit(train_data, epochs=epochs, batch_size=batch_size,
validation_split=0.2, shuffle=True)
To reproduce the error uncomment these lines and comment the one preceding it:
z_decoded = decoder(whitened_encoding)
s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
I would appreciate it if someone could tell me how to wrap the SVD inside Keras layers or an alternate implementation.
Please note that I have not included the reparameterization trick to calculate the loss to keep the code simple.
Thank you !
I solved the problem. To use SVD inside Keras, we need to use the Lambda layer. However, as Lambda returns a tensor with some additional attributes, it is best to do additional work inside the lambda function and return a tensor. Another problem with my code was the combination of encoder and decoder model which I fixed by combining the output of encoder to the input of decoder model. The working code is as follows:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from keras.layers import Lambda, Input, Dense, Multiply, Subtract
from keras.models import Model
from keras import backend as K
from keras.losses import mse
from keras import optimizers
from keras.callbacks import EarlyStopping
def SVD(sigma2):
s ,u ,v = tf.linalg.svd(sigma2,compute_uv=True)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon),
tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
return whitening_layer
x = np.random.randn(100, 5)
train_data = preprocessing.scale(x)
input_shape = (5, )
original_dim = train_data.shape[1]
intermediate_dim_1 = 64
intermediate_dim_2 = 16
latent_dim = 2
batch_size = 10
epochs = 15
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
layer_1 = Dense(intermediate_dim_1, activation='tanh') (inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
encoded_layer = Dense(latent_dim, name='latent_layer') (layer_2)
encoder = Model(inputs, encoded_layer, name='encoder')
encoder.summary()
# build decoder model
latent_inputs = Input(shape=(latent_dim,))
layer_1 = Dense(intermediate_dim_1, activation='tanh') (latent_inputs)
layer_2 = Dense(intermediate_dim_2, activation='tanh') (layer_1)
outputs = Dense(original_dim,activation='sigmoid') (layer_2)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# mean removal and pca whitening
meanX = Lambda(lambda x: tf.reduce_mean(x, axis=0, keepdims=True))(encoded_layer)
standardized = Subtract()([encoded_layer, meanX])
sigma2 = K.dot(K.transpose(standardized), standardized)
sigma2 = Lambda(lambda x: x / batch_size)(sigma2)
# s, u ,v = tf.linalg.svd(sigma2,compute_uv=True)
whitening_layer = Lambda(SVD)(sigma2)
'''
s ,u ,v = Lambda(lambda x: tf.linalg.svd(x,compute_uv=True))(sigma2)
epsilon = 1e-6
# sqrt of number close to 0 leads to problem hence replace it with epsilon
si = tf.where(tf.less(s, epsilon),
tf.sqrt(1 / epsilon) * tf.ones_like(s),
tf.math.truediv(1.0, tf.sqrt(s)))
whitening_layer = u # tf.linalg.diag(si) # tf.transpose(v)
'''
print('whitening_layer shape=', np.shape(whitening_layer))
print('standardized shape=', np.shape(standardized))
whitened_encoding = K.dot(standardized, whitening_layer)
# Connect models
# z_decoded = decoder(standardized)
z_decoded = decoder(encoder(inputs))
# Define losses
reconstruction_loss = mse(inputs,z_decoded)
# Instantiate autoencoder
ae = Model(inputs, z_decoded, name='autoencoder')
ae.add_loss(reconstruction_loss)
# callback = EarlyStopping(monitor='val_loss', patience=5)
adam = optimizers.adam(learning_rate=0.002)
ae.compile(optimizer=adam)
ae.summary()
ae.fit(train_data, epochs=epochs, batch_size=batch_size,
validation_split=0.2, shuffle=True)
The output of running the code is as follows:
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) (None, 5) 0
_________________________________________________________________
dense_1 (Dense) (None, 64) 384
_________________________________________________________________
dense_2 (Dense) (None, 16) 1040
_________________________________________________________________
latent_layer (Dense) (None, 2) 34
=================================================================
Total params: 1,458
Trainable params: 1,458
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 2) 0
_________________________________________________________________
dense_3 (Dense) (None, 64) 192
_________________________________________________________________
dense_4 (Dense) (None, 16) 1040
_________________________________________________________________
dense_5 (Dense) (None, 5) 85
=================================================================
Total params: 1,317
Trainable params: 1,317
Non-trainable params: 0
_________________________________________________________________
whitening_layer shape= (2, 2)
standardized shape= (None, 2)
/home/manish/anaconda3/envs/ica_gpu/lib/python3.7/site-packages/keras/engine/training_utils.py:819: UserWarning: Output decoder missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to decoder.
'be expecting any data to be passed to {0}.'.format(name))
Model: "autoencoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) (None, 5) 0
_________________________________________________________________
encoder (Model) (None, 2) 1458
_________________________________________________________________
decoder (Model) (None, 5) 1317
=================================================================
Total params: 2,775
Trainable params: 2,775
Non-trainable params: 0
_________________________________________________________________
Train on 80 samples, validate on 20 samples
Epoch 1/15
2020-05-16 16:01:55.443061: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
80/80 [==============================] - 0s 3ms/step - loss: 1.1739 - val_loss: 1.2238
Epoch 2/15
80/80 [==============================] - 0s 228us/step - loss: 1.0601 - val_loss: 1.0921
Epoch 3/15
80/80 [==============================] - 0s 261us/step - loss: 0.9772 - val_loss: 1.0291
Epoch 4/15
80/80 [==============================] - 0s 223us/step - loss: 0.9385 - val_loss: 0.9875
Epoch 5/15
80/80 [==============================] - 0s 262us/step - loss: 0.9105 - val_loss: 0.9560
Epoch 6/15
80/80 [==============================] - 0s 240us/step - loss: 0.8873 - val_loss: 0.9335
Epoch 7/15
80/80 [==============================] - 0s 217us/step - loss: 0.8731 - val_loss: 0.9156
Epoch 8/15
80/80 [==============================] - 0s 253us/step - loss: 0.8564 - val_loss: 0.9061
Epoch 9/15
80/80 [==============================] - 0s 273us/step - loss: 0.8445 - val_loss: 0.8993
Epoch 10/15
80/80 [==============================] - 0s 235us/step - loss: 0.8363 - val_loss: 0.8937
Epoch 11/15
80/80 [==============================] - 0s 283us/step - loss: 0.8299 - val_loss: 0.8874
Epoch 12/15
80/80 [==============================] - 0s 254us/step - loss: 0.8227 - val_loss: 0.8832
Epoch 13/15
80/80 [==============================] - 0s 227us/step - loss: 0.8177 - val_loss: 0.8789
Epoch 14/15
80/80 [==============================] - 0s 241us/step - loss: 0.8142 - val_loss: 0.8725
Epoch 15/15
80/80 [==============================] - 0s 212us/step - loss: 0.8089 - val_loss: 0.8679
I hope this helps.