Keras OOM on multiple model and del() doesn't work

Keras OOM on multiple model and del() doesn't work - python

I'm trying to run multiple Keras model from functions but it gives OOM error my code looks like this :
def mainModel():
trainX, testX, trainY, testY = train_test_split()
evScoreVGG, imHolderVGG = runKerasVGGModel( train and test )
evScoreResNet, imHolderResnet = runResnetKerasModel(train and test values)
def createKerasVGG19Model():
base_model = VGG19( weights='imagenet',
include_top=False,
input_shape=(width, height,3),
)
base_model.trainable = False
model = Sequential([
base_model,
Flatten(),
Dense(164, activation="softmax"),
])
return model
#The ResNet is the same with ResNet50V2 base model
def runKerasVGGModel(trainX,testX,trainY,testY,mode):
modelKeras = createKerasVGG19Model()
modelKeras.compile( foo )
modelKeras.fit( foo )
evScore = modelKeras.evaluate(testX, testY)
predictedValue = modelKeras.predict(testX, mode)
del(modelKeras)
K.clear_session()
return evScore
Code for running and creating Resnet is the same as the VGG model without any change. I didn't add for readability.
I tried writing del(model) at the end of the runKeras() as del(model)
I tried writing K.clear_session() at the end of the runKeras()
I tried writing K.clear_session() between the runKeras1() and runKeras2() inside the mainModel

Related

tf.train.Checkpoint is restoring or not?

I am running tensorflow 2.4 on colab. I tried to save the model using tf.train.Checkpoint() since it includes model subclassing, but after restoration I saw It didn't restored any weights of my model.
Here are few snippets:
### From tensorflow tutorial nmt_with_attention
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
...
self.gru = tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
.
.
.
class NMT_Train(tf.keras.Model):
def __init__(self, inp_vocab_size, tar_vocab_size, max_length_inp, max_length_tar, emb_dims, units, batch_size, source_tokenizer, target_tokenizer):
super(NMT_Train, self).__init__()
self.encoder = Encoder(inp_vocab_size, emb_dims, units, batch_size)
...
.
.
.
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
model.fit(dataset, epochs=2)
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
manager.save()
model.encoder.gru.get_weights() ### get the output
##[array([[-0.0627057 , 0.05900152, 0.06614069, ...
model.optimizer.get_weights() ### get the output
##[90, array([[ 6.6851695e-05, -4.6736805e-06, -2.3183979e-05, ...
When I later restored it I didn't get any gru weights:
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
manager.restore_or_initialize()
model.encoder.gru.get_weights() ### empty list
## []
model.optimizer.get_weights() ### empty list
## []
I also tried checkpoint.restore(manager.latest_checkpoint) but nothing changed.
Is there any thing wrong I am doing?? Or suggest any other way around to save the model so that I can retrain it for further epochs.

You are defining a keras model, so why do not use keras model chekpoints?
From Keras documentation:
model.compile(loss=..., optimizer=...,
metrics=['accuracy'])
EPOCHS = 10
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
# Model weights are saved at the end of every epoch, if it's the best seen
# so far.
model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])
# The model weights (that are considered the best) are loaded into the model.
model.load_weights(checkpoint_filepath)

Tensorflow 2.0 Model subclassing

I made this function that incorporates a resnet into a model. It works well, and I can save it.
My problem is that I can't load it because it needs a call function. I am not exactly sure of how to turn this into a class. The attempt is at the bottom. some pointers would be helpful.
def build_network():
inp = Input(shape=(256,256,3))
resnet = tf.keras.applications.ResNet152V2(
include_top=False, weights='imagenet', input_tensor=None,
input_shape=(256,256,3), pooling=None, classes=1000
)
# classifier_activation='softmax'
x = resnet(inp)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(9, activation='softmax')(x)
model = tf.keras.Model(inputs=inp,outputs = x)
opt = tf.keras.optimizers.SGD(momentum=0.9)
# optimizer = 'adam',
model.compile(loss='categorical_crossentropy',
optimizer = opt,
metrics=['accuracy'])
model.summary()
return model
class Resnet(tf.keras.Model):
def __init__(self, num_classes=9):
super(Resnet, self).__init__()
self.block_1 = tf.keras.applications.ResNet152V2(
include_top=False, weights='imagenet', input_tensor=None,
input_shape=(256,256,3), pooling=None, classes=1000)
self.global_pool = layers.GlobalAveragePooling2D()
self.dropout = Dropout(0.3)
self.classifier = Dense(num_classes, activation = 'softmax')
def call(self, inputs):
x = self.block_1(inputs)
x = self.global_pool(x)
x = self.dropout(x)
x = self.classifier(x)
return tf.keras.Model(inputs = inputs, outputs = x)

Using the subclassing API will actually make your model unserializable (see the "Limitations section in the "What are Symbolic and Imperative APIs in TensorFlow 2.0? " blogpost):
Imperative models are also more difficult to inspect, copy, or clone.
For example, model.save(), model.get_config(), and clone_model do not work for subclassed models. Likewise, model.summary() only gives you a list of layers (and doesn’t provide information on how they’re connected, since that’s not accessible).
Edit: From Tensorflow 2.4, it is possible to pass a save_traces argument to model.save to serialize models built using the subclassing API. See https://www.tensorflow.org/guide/keras/save_and_serialize#how_savedmodel_handles_custom_objects.
Here's a simple example of how you can do this:
import tensorflow as tf
from tensorflow.keras.layers import (Dense, Dropout, GlobalAveragePooling2D,
Input)
def build_network():
inp = Input(shape=(256, 256, 3))
resnet = tf.keras.applications.ResNet152V2(include_top=False,
weights="imagenet",
input_tensor=None,
input_shape=(256, 256, 3),
pooling=None,
classes=1000)
# classifier_activation="softmax"
x = resnet(inp)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(9, activation="softmax")(x)
model = tf.keras.Model(inputs=inp, outputs=x)
# optimizer = "adam",
opt = tf.keras.optimizers.SGD(momentum=0.9)
model.compile(loss="categorical_crossentropy",
optimizer=opt,
metrics=["accuracy"])
model.summary()
return model
if __name__ == "__main__":
model = build_network()
model.summary()
# Save
model.save("my_model.h5")
# Load
loaded_model = tf.keras.models.load_model("my_model.h5")
loaded_model.summary()
To load your saved model from your build_network function use tf.keras.models.load_model.

Tensorflow 2 eager execution disabled inside a custom layer

I'm using TF2 installed via pip in a ubuntu 18.04 box
$ pip freeze | grep "tensorflow"
tensorflow==2.0.0
tensorflow-estimator==2.0.1
And I'm playing with a custom layer.
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Input, Concatenate, Dense, Bidirectional, LSTM, Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import imdb
class Attention(tf.keras.layers.Layer):
def __init__(self, units):
super(Attention, self).__init__()
self.W1 = Dense(units)
self.W2 = Dense(units)
self.V = Dense(1)
def call(self, features, hidden):
hidden_with_time_axis = tf.expand_dims(hidden, 1)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.nn.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
vocab_size = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
max_len = 200
rnn_cell_size = 128
x_train = sequence.pad_sequences(x_train, maxlen=max_len, padding='post')
x_test = sequence.pad_sequences(x_test, maxlen=max_len, truncating='post', padding='post')
# Network
sequence_input = Input(shape=(max_len,), dtype='int32')
embedded_sequences = Embedding(vocab_size, 128, input_length=max_len)(sequence_input)
# lstm = Bidirectional(LSTM(rnn_cell_size, dropout=0.3, return_sequences=True, return_state=True), name="bi_lstm_0")(embedded_sequences)
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(rnn_cell_size, dropout=0.2, return_sequences=True, return_state=True))(embedded_sequences)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
attention = Attention(8)
context_vector, attention_weights = attention(lstm, state_h)
output = Dense(1, activation='sigmoid')(context_vector)
model = Model(inputs=sequence_input, outputs=output)
# summarize layers
print(model.summary())
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=10, batch_size=200, validation_split=.3, verbose=1)
result = model.evaluate(x_test, y_test)
print(result)
I would like to debug/inspect the Attention.call() function, but I'm not able to get the tensors values when a set a breakpoint inside the funcion.
Before I start the .fit(), I can verify that the eager execution is Enabled
print(tf.executing_eagerly())
True
But inside the Attention.call() function the eager execution is Disabled
print(tf.executing_eagerly())
False
Any reason for the eager execution be false during the call() execution ? How to enable it ?

By default, tf.keras model is compiled to a static graph to deliver the best execution performance. Just think that #tf.function is by default annotated for tf.keras model.
https://www.tensorflow.org/api_docs/python/tf/keras/Model#run_eagerly
To enable eager mode explicitly for tf.keras model, in your code, compile the model with run_eagerly=True.
model.compile(optimizer='adam', run_eagerly = True, loss='binary_crossentropy', metrics=['accuracy'])

`fit_generator` is not yet enabled for unbuilt Model subclasses

I am trying to utilize Model.fit_generator of Keras in tensorflow 1.10.
Simplified reproducible code:
import tensorflow as tf
import numpy as np
class TestNet(tf.keras.Model):
def __init__(self, class_count, name='TestNet', **kwargs):
super(TestNet, self).__init__(name=name, **kwargs)
self.convolution = tf.keras.layers.Conv1D(class_count, kernel_size=1, input_shape=(None, 3))
def call(self, points):
return self.convolution(points)
def segmentation_loss(labels, logits):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
return tf.reduce_mean(cross_entropy)
def generate():
while True:
yield (np.zeros(shape=(100,3)), np.zeros(shape=(100)))
if __name__ == "__main__":
test_net = TestNet(class_count=5)
optimizer = tf.keras.optimizers.Adam()
test_net.compile(optimizer, loss=segmentation_loss)
history = test_net.fit_generator(generate, steps_per_epoch=1000, epochs=10)
While this works in tensorflow 1.14, executing this in 1.10 yields the NotImplementedError in the title:
NotImplementedError: fit_generator is not yet enabled for unbuilt Model subclasses
Anybody knows how to work around this?

I see a mistake in fit_generator, update to this:
history = test_net.fit_generator(generate(), steps_per_epoch=1000, epochs=10)
Now, I don't know how Tensorflow 1.10 works, but this kind of modeling is rather new, usually a Keras model is built like this:
#model's inputs
inputs = Input((None,3))
#model's layers
convolution = Conv1D(class_count, kernel_size=1)
#model's call
outputs = convolution(inputs)
#model finish
test_net = Model(inputs, outputs)
A workaround for you:
inputs = Input(shape)
test_net = TestNet(...)
outputs = test_net.call(inputs)
test_net_model = Model(inputs, outputs)

Transfer learning on keras model always gives same predictions

I'm trying to train an image classifier using keras applications module. When I run predictions on validation set, all images are predicted as the same class. It is not always the same class, it varies during training. I'm using MobileNetV2 with weights from ImageNet but I also tried other models with same result.
I've tried using model from TensorFlow hub like described in this tutorial: https://www.tensorflow.org/beta/tutorials/images/hub_with_keras and it worked fine, so it is not a data set issue.
My code snippet:
image_size = 224
batch_size = 32
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)
train_generator = train_datagen.flow_from_directory(training_data_dir,
target_size=(image_size, image_size),
batch_size=batch_size)
validation_generator = train_datagen.flow_from_directory(validation_data_dir,
target_size=(image_size, image_size),
batch_size=batch_size)
IMG_SHAPE = (image_size, image_size, 3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights="imagenet")
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(train_generator.num_classes, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
loss="categorical_crossentropy",
metrics=["accuracy"])
model.summary()
batch_stats = CollectBatchStats()
epoch_stats = CollectEpochStats(model, validation_generator)
checkpoint = tf.keras.callbacks.ModelCheckpoint(...)
epochs = 10
steps_per_epoch = train_generator.n // train_generator.batch_size
validation_steps = validation_generator.n // validation_generator.batch_size
history = model.fit_generator(train_generator,
epochs=epochs,
steps_per_epoch=steps_per_epoch,
callbacks=[batch_stats, epoch_stats, checkpoint],
workers=4,
validation_data=validation_generator,
validation_steps=validation_steps)

Issue resolved: in my code I had following lines after model compilation:
sess = keras_backend.get_session()
init = tf.compat.v1.global_variables_initializer()
sess.run(init)
After removing them everything works fine.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Keras OOM on multiple model and del() doesn't work - python

Related

tf.train.Checkpoint is restoring or not?

Tensorflow 2.0 Model subclassing

Tensorflow 2 eager execution disabled inside a custom layer

`fit_generator` is not yet enabled for unbuilt Model subclasses

Transfer learning on keras model always gives same predictions

Categories

Resources