I want to use an AutoEncoder model with Keras functional API. Also I want to use tf.data.Dataset as an input pipeline for the model. However, there is limitation that I can pass the dataset to the keras.model.fit only with tuple (inputs, targets) accroding to the docs:
Input data. It could be: A tf.data dataset. Should return a tuple of either (inputs, targets) or (inputs, targets, sample_weights).
So here is the question: can I pass the tf.data.Dataset without repeating inputs like that (inputs, inputs) and more like (inputs, None). And if I can't, will the repeated inputs double the GPU memory for my model?
You can use map() to return your input twice:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Conv2DTranspose, Reshape
from functools import partial
(xtrain, _), (xtest, _) = tf.keras.datasets.mnist.load_data()
ds = tf.data.Dataset.from_tensor_slices(
tf.expand_dims(tf.concat([xtrain, xtest], axis=0), axis=-1))
ds = ds.take(int(1e4)).batch(4).map(lambda x: (x/255, x/255))
custom_convolution = partial(Conv2D, kernel_size=(3, 3),
strides=(1, 1),
activation='relu',
padding='same')
custom_pooling = partial(MaxPool2D, pool_size=(2, 2))
conv_encoder = Sequential([
custom_convolution(filters=16, input_shape=(28, 28, 1)),
custom_pooling(),
custom_convolution(filters=32),
custom_pooling(),
custom_convolution(filters=64),
custom_pooling()
])
# conv_encoder(next(iter(ds))[0].numpy().astype(float)).shape
custom_transpose = partial(Conv2DTranspose,
padding='same',
kernel_size=(3, 3),
activation='relu',
strides=(2, 2))
conv_decoder = Sequential([
custom_transpose(filters=32, input_shape=(3, 3, 64), padding='valid'),
custom_transpose(filters=16),
custom_transpose(filters=1, activation='sigmoid'),
Reshape(target_shape=[28, 28, 1])
])
conv_autoencoder = Sequential([
conv_encoder,
conv_decoder
])
conv_autoencoder.compile(loss='binary_crossentropy', optimizer='adam')
history = conv_autoencoder.fit(ds)
2436/2500 [============================>.] - ETA: 0s - loss: 0.1282
2446/2500 [============================>.] - ETA: 0s - loss: 0.1280
2456/2500 [============================>.] - ETA: 0s - loss: 0.1279
2466/2500 [============================>.] - ETA: 0s - loss: 0.1278
2476/2500 [============================>.] - ETA: 0s - loss: 0.1277
2487/2500 [============================>.] - ETA: 0s - loss: 0.1275
2497/2500 [============================>.] - ETA: 0s - loss: 0.1274
2500/2500 [==============================] - 14s 6ms/step - loss: 0.1273
"the repeated inputs double the GPU memory for my model?"; Generally , the dataset pipeline run on CPU, not on GPU.
For your AutoEncoder model, if you want to use a dataset that only contains the examples without labels, you can use custom training :
def loss(model, x):
y_ = model(x, training=True) # use x as input
return loss_object(y_true=x, y_pred=y_) # use x as label (y_true)
with tf.GradientTape() as tape:
loss_value = loss(model, inputs)
If it is necessary to use the fit() method, you can subclass keras.Model and overrides the train_step() method link . (I did not verify this code ) :
class CustomModel(keras.Model):
def train_step(self, data):
x = data
y = data # the same data as label ++++
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
In TensorFlow 2.4, I've got a dataset that returns a tuple of one element, ie (inputs,), which is training just fine. The only caveat is of course that you cannot pass a loss or metrics to model.compile, but must instead use the add_loss and add_metric APIs somewhere in the model.
Related
I am training a variational autoencoder using USPS dataset of shape (7291, 16, 16). Below is my code snipet. I also tried the same code snipet on MNIST dataset of shape (60000,28,28) and everything seems to work fine. Both are gray scale images. I can figure out why I am getting the a negative value for training loss and validation loss for USPS dataset. The code execution is quite straight forwards, The only changes from MNIST model is mnist.load_data() to usps.load_data().
I also have also tried reducing the number of layers in both the encoder and decoder network but the result for the USPS model appears the same. I can figure out what exactly I am getting wrong. please I need your assistance to understand the reason for the negative values.
!pip install extra_keras_datasets
#######################################
from extra_keras_datasets import usps
import keras
from keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape
#from keras.layers import BatchNormalization
from keras.models import Model
from keras.datasets import mnist
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import matplotlib.pyplot as plt
# Load MNIST
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
(x_train, y_train), (x_test, y_test) = usps.load_data()
#Normalize and reshape ============
#Norm.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255
x_test = x_test / 255
# Reshape
img_width = x_train.shape[1]
img_height = x_train.shape[2]
num_channels = 1 #MNIST --> grey scale so 1 channel
x_train = x_train.reshape(x_train.shape[0], img_height, img_width, num_channels)
x_test = x_test.reshape(x_test.shape[0], img_height, img_width, num_channels)
input_shape = (img_height, img_width, num_channels)
# ========================
# BUILD THE MODEL
# # ================= #############
# # Encoder
#Let us define 4 conv2D, flatten and then dense
# # ================= ############
latent_dim = 2 # Number of latent dim parameters
#Create the model
input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(32, 3, padding='same', activation='relu')(input_img)
x = Conv2D(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
conv_shape = K.int_shape(x) #Shape of conv to be provided to decoder
print(conv_shape)
#Flatten
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
# Two outputs, for latent mean and log variance (std. dev.)
#Use these to sample random variables in latent space to which inputs are mapped.
z_mu = Dense(latent_dim, name='latent_mu')(x) #Mean values of encoded input
z_sigma = Dense(latent_dim, name='latent_sigma')(x) #Std dev. (variance) of encoded input
#REPARAMETERIZATION TRICK
# Define sampling function to sample from the distribution
# Reparameterize sample based on the process defined by Gunderson and Huang
# into the shape of: mu + sigma squared x eps
#This is to allow gradient descent to allow for gradient estimation accurately.
def sample_z(args):
z_mu, z_sigma = args
eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
return z_mu + K.exp(z_sigma / 2) * eps
# sample vector from the latent distribution
# z is the labda custom layer we are adding for gradient descent calculations
# using mu and variance (sigma)
z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])
#Z (lambda layer) will be the last layer in the encoder.
# Define and summarize encoder model.
encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())
Decoder
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
# reshape to the shape of last conv. layer in the encoder, so we can
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
# upscale (conv2D transpose) back to original shape
# use Conv2DTranspose to reverse the conv layers defined in the encoder
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2, 2))(x)
#Can add more conv2DTranspose layers, if desired.
#Using sigmoid activation
x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)
# Define and summarize decoder model
decoder = Model(decoder_input, x, name='decoder')
# apply the decoder to the latent sample
z_decoded = decoder(z)
decoder.summary()
custom loss and model fitting
#VAE is trained using two loss functions reconstruction loss and KL divergence
#Let us add a class to define a custom layer with loss
class CustomLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
# Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)
recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
# KL divergence
kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
return K.mean(recon_loss + kl_loss)
# add custom loss to the class
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomLayer()([input_img, z_decoded])
# y is basically the original image after encoding input img to mu, sigma, z
# and decoding sampled z values.
#This will be used as output for vae
# =================
# VAE
# =================
vae = Model(input_img, y, name='vae')
# Compile VAE
vae.compile(optimizer='adam', loss=None)
vae.summary()
# Train autoencoder
vae.fit(x_train, None, epochs = 10, batch_size = 32, validation_split = 0.2)
Here is my training History.
5832/5832 [==============================] - 5s 928us/sample - loss: 0.0345 - val_loss: -0.0278
Epoch 2/10
5832/5832 [==============================] - 4s 740us/sample - loss: -0.0301 - val_loss: -0.0292
Epoch 3/10
5832/5832 [==============================] - 4s 746us/sample - loss: -0.0307 - val_loss: -0.0293
Epoch 4/10
5832/5832 [==============================] - 4s 751us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 5/10
5832/5832 [==============================] - 4s 753us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 6/10
5832/5832 [==============================] - 4s 746us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 7/10
5832/5832 [==============================] - 4s 750us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 8/10
5832/5832 [==============================] - 4s 742us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 9/10
5832/5832 [==============================] - 4s 751us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 10/10
5832/5832 [==============================] - 4s 748us/sample - loss: -0.0307 - val_loss: -0.0294
I have a dataset with multiple fields, but only two are relevant for my machine learning implementation. The rest shall not be considered for predictions, but might unveil interesting correlations.
Is there a way to return prediction results when calling model.evaluate?
For example:
[loss, accuracy, predicted_results] = model.evaluate(input, results)
AFAIK, we can't get prediction on x using model.evaluate, it simply returns the loss and acc, source. But for your need, you can write a custom class and define the necessary calls such as .evaluate and .predict. Let's define a simple model to demonstrate.
Train and Run
import tensorflow as tf
import numpy as np
img = tf.random.normal([20, 32], 0, 1, tf.float32)
tar = np.random.randint(2, size=(20, 1))
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(10, input_dim = 32,
kernel_initializer ='normal', activation= 'relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.fit(img, tar, epochs=2, verbose=2)
Epoch 1/2
1/1 - 1s - loss: 0.7083 - accuracy: 0.5000
Epoch 2/2
1/1 - 0s - loss: 0.6983 - accuracy: 0.5000
Now, for your request, we can do something as follows:
class Custom_Evaluate:
def __init__(self, model):
self.model = model
def eval_predict(self, x, y):
loss, acc = self.model.evaluate(x, y)
pred = self.model.predict(x)
return loss, acc, pred
custom_evaluate = Custom_Evaluate(model)
loss, acc, pred = custom_evaluate.eval_predict(img, tar)
print(loss, acc)
print(pred)
0.6886215806007385 0.6499999761581421
[[0.5457604 ]
[0.6126752 ]
[0.53668976]
[0.40323135]
[0.37159938]
[0.5520069 ]
[0.4959099 ]
[0.5363802 ]
[0.5033434 ]
[0.65680957]
[0.6863682 ]
[0.44409862]
[0.4672098 ]
[0.49656072]
[0.620726 ]
[0.47991502]
[0.58834356]
[0.5245693 ]
[0.5359181 ]
[0.4575624 ]]
Recently I tried to use BasicLSTMCell api from Tensorflow to generate video caption. I am working with a code that builds BasicLSTMCell in the following way:
self.lstm1 = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(dim_hidden, state_is_tuple=False)
self.lstm2 = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(dim_hidden, state_is_tuple=False)
Then uses it later as follows:
with tf.compat.v1.variable_scope("Encoding") as scope:
for i in range(0, self.n_video_lstm_step):
if i > 0:
scope.reuse_variables()
with tf.compat.v1.variable_scope("LSTM1"):
output1, state1 = self.lstm1(image_emb[:,i,:], state1)
with tf.compat.v1.variable_scope("LSTM2"):
output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)
out_list.append(tf.concat([output1, output2], 1))
I want these LSTM cells to be bidirectional for my requirement. I have tried using
keras.layers.Bidirectional(keras.layers.LSTM(dim_hidden, unit_forget_bias=True, unroll=True))
But it didn't work. Can anyone let me know how to make it work with bidirectional lstm.
Based on the question you ask, Convert BasicLSTMCell to bidirectional LSTM - You can use the Bi-Directional RNN wrapper directly as shown in the code below. Do clarify how you are modifying the LSTM layer class that is causing the error you are facing. Ill update my answer accordingly.
import numpy as np
from tensorflow.keras import layers, Model, utils
X = np.random.random((100,10,3))
y = np.random.random((100,))
inp = layers.Input((10,3))
x = layers.Bidirectional(layers.LSTM(8, return_sequences=True))(inp)
x = layers.Bidirectional(layers.LSTM(8))(x)
out = layers.Dense(1, activation='softmax')(x)
model = Model(inp, out)
utils.plot_model(model, show_layer_names=False, show_shapes=True)
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(X, y, epochs=3)
Epoch 1/3
4/4 [==============================] - 5s 10ms/step - loss: 0.6963
Epoch 2/3
4/4 [==============================] - 0s 22ms/step - loss: 0.6965
Epoch 3/3
4/4 [==============================] - 0s 11ms/step - loss: 0.6976
<tensorflow.python.keras.callbacks.History at 0x7f91066bf4c0>
I am trying the the training and evaluation example on the tensorflow website.
Specifically, this part:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
def get_uncompiled_model():
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.BatchNormalization()(x)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
def get_compiled_model():
model = get_uncompiled_model()
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
sample_weight = np.ones(shape=(len(y_train),))
sample_weight[y_train == 5] = 2.
# Create a Dataset that includes sample weights
# (3rd element in the return tuple).
train_dataset = tf.data.Dataset.from_tensor_slices(
(x_train, y_train, sample_weight))
# Shuffle and slice the dataset.
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)
model = get_compiled_model()
model.fit(train_dataset, epochs=3)
It appears that if I add the batch normalization layer (this line: x = layers.BatchNormalization()(x)) I get the following error:
InvalidArgumentError: The second input must be a scalar, but it has shape [64]
[[{{node batch_normalization_2/cond/ReadVariableOp/Switch}}]]
Any ideas?
The same code works for me.
The only lines I changed are :
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3)
to model.compile(optimizer=keras.optimizers.RMSprop(lr=1e-3)
(which is version specific)
Then
model.fit(train_dataset, epochs=3) to model.fit(train_dataset, epochs=3, steps_per_epoch=30)
Reason : When using iterators as input to a model, you should specify the steps_per_epoch argument
If you just want to use sample weights, you don't have to use tf.data.Dataset, you can simply run:
model.fit(x=x_train, y=y_train, sample_weight=sample_weight, batch_size=64, epochs=3)
and it works for me (when I change learning_rate to lr as #ASHu2 mentioned).
It gets 97% accuracy after 3 epochs:
...
57408/60000 [===========================>..] - ETA: 0s - loss: 0.1010 - sparse_categorical_accuracy: 0.9709
58816/60000 [============================>.] - ETA: 0s - loss: 0.1011 - sparse_categorical_accuracy: 0.9708
60000/60000 [==============================] - 2s 37us/sample - loss: 0.1007 - sparse_categorical_accuracy: 0.9709
I used TF 1.14.0 on windows.
The problem was solved when I updated tensorflow from version 1.14.1 to 2.0.0-rc1.
I am trying to implement a custom loss function in Keras.
To start it off, I wanted to be sure the previous loss function can be called from my custom function. And this is where the weird stuff begins:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
works as expected.
Now the implementation of "sparse_categorical_crossentropy" in keras.losses is as follows:
def sparse_categorical_crossentropy(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
I concluded that passing K.sparse_categorical_crossentropy directly should also work. However, it throws expected activation_6 to have shape (4,) but got array with shape (1,).
Also, defining a custom loss function like this:
def custom_loss(y_true, y_pred):
return keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
does not work. During training is reduces the loss (which seems correct) but the accuracy does not improve (but it does, when using the non-custom loss function)
I am not sure what is happening, neither do I know how to debug it properly. Any help would be highly appreciated.
I tested what you are saying on my code and yes, you are right. I was initially getting the same error as you were getting, but once I changed the metrics parameter from accuracy to sparse_categorical_accuracy, I started getting higher accuracy.
Here, one important thing to note is when we tell keras to use accuracy as metrics, keras uses the default accuracy which is categorical_accuracy. So, if we want to implement our own custom loss function, then we have to set metrics parameter accordingly.
Read about available metrics function in keras from here.
Case 1:
def sparse_categorical_crossentropy(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
model.compile(optimizer='adam',
loss=sparse_categorical_crossentropy,
metrics=['accuracy'])
output:
ValueError: Error when checking target: expected dense_71 to have
shape (10,) but got array with shape (1,)
Case 2:
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
output:
Epoch 1/2
60000/60000 [==============================] - 2s 38us/step - loss: 0.4714 - acc: 0.8668
Epoch 2/2
60000/60000 [==============================] - 1s 22us/step - loss: 0.2227 - acc: 0.9362
10000/10000 [==============================] - 1s 94us/step
Case 3:
def custom_sparse_categorical_crossentropy(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
model.compile(optimizer='adam',
loss=custom_sparse_categorical_crossentropy,
metrics=['accuracy'])
output:
Epoch 1/2
60000/60000 [==============================] - 2s 41us/step - loss: 0.4558 - acc: 0.1042
Epoch 2/2
60000/60000 [==============================] - 1s 22us/step - loss: 0.2164 - acc: 0.0997
10000/10000 [==============================] - 1s 89us/step
Case 4:
def custom_sparse_categorical_crossentropy(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
model.compile(optimizer='adam',
loss=custom_sparse_categorical_crossentropy,
metrics=['sparse_categorical_accuracy'])
output:
Epoch 1/2
60000/60000 [==============================] - 2s 40us/step - loss: 0.4736 - sparse_categorical_accuracy: 0.8673
Epoch 2/2
60000/60000 [==============================] - 1s 23us/step - loss: 0.2222 - sparse_categorical_accuracy: 0.9372
10000/10000 [==============================] - 1s 85us/step
Full Code:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import keras.backend as K
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(100, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.10),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
def custom_sparse_categorical_crossentropy(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
#def sparse_categorical_accuracy(y_true, y_pred):
# # reshape in case it's in shape (num_samples, 1) instead of (num_samples,)
# if K.ndim(y_true) == K.ndim(y_pred):
# y_true = K.squeeze(y_true, -1)
# # convert dense predictions to labels
# y_pred_labels = K.argmax(y_pred, axis=-1)
# y_pred_labels = K.cast(y_pred_labels, K.floatx())
# return K.cast(K.equal(y_true, y_pred_labels), K.floatx())
model.compile(optimizer='adam',
loss=custom_sparse_categorical_crossentropy,
metrics=['sparse_categorical_accuracy'])
history = model.fit(x_train, y_train, epochs=2, batch_size=200)
model.evaluate(x_test, y_test)
Check out the implementation of sparse_categorical_accuracy from here and sparse_categorical_crossentropy from here.
What happens is that when you use the accuracy metric, Kera actually selects a different accuracy implementation depending on the loss, as how the accuracy is computed depends on the labels and the predictions of the model:
for categorical_crossentropy it uses categorical_accuracy as accuracy metric.
for binary_crossentropy it uses binary_accuracy as accuracy metric.
for sparse_categorical_crossentropy it uses sparse_categorical_accuracy as accuracy metric.
Keras can only do this if you use the predefined losses, as it can't guess otherwise. For your custom loss you can directly use one of the three accuracy implementations directly, like metrics=['sparse_categorical_accuracy'].