I need to copy a keras model and there is no way that I know of which can be done unless the model is not a tf.keras.models.Model() subclass.
Note: The use copy.deepcopy() will work without giving any errors however it will result in another error whenever the copy is used.
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)
self.dropout = tf.keras.layers.Dropout(0.5)
def call(self, inputs, training=False):
x = self.dense1(inputs)
if training:
x = self.dropout(x, training=training)
return self.dense2(x)
if __name__ == '__main__':
model1 = MyModel()
model2 = tf.keras.models.clone_model(model1)
Results in:
Traceback (most recent call last):
File "/Users/emadboctor/Library/Application Support/JetBrains/PyCharm2020.3/scratches/scratch.py", line 600, in <module>
model2 = tf.keras.models.clone_model(model1)
File "/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/models.py", line 430, in clone_model
return _clone_functional_model(
File "/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/models.py", line 171, in _clone_functional_model
raise ValueError('Expected `model` argument '
ValueError: Expected `model` argument to be a functional `Model` instance, but got a subclass model instead.
Currently, we can't use tf.keras.models.clone_model for subclassed model API whereas we can for sequential and functional API. From doc,
model Instance of Model (could be a functional model or a Sequential model).
Here is a workaround for your need. It makes sense if we need to copy a trained model, where we can get some optimized parameters. So, the main task is we need to create a new model by copying an existing model. The most convenient way for now of this scenario is to get trained weight and set to the newly created model instances. Let first build a model, train it and then get and set weight matrices to the new model.
import tensorflow as tf
import numpy as np
class ModelSubClassing(tf.keras.Model):
def __init__(self, num_classes):
super(ModelSubClassing, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, strides=2, activation="relu")
self.gap = tf.keras.layers.GlobalAveragePooling2D()
self.dense = tf.keras.layers.Dense(num_classes)
def call(self, input_tensor, training=False):
# forward pass: block 1
x = self.conv1(input_tensor)
x = self.gap(x)
return self.dense(x)
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=raw_shape)
return tf.keras.Model(inputs=[x], outputs=self.call(x))
# compile
sub_classing_model = ModelSubClassing(10)
loss = tf.keras.losses.CategoricalCrossentropy(),
metrics = tf.keras.metrics.CategoricalAccuracy(),
optimizer = tf.keras.optimizers.Adam())
# plot for debug
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
# train set / data
x_train = np.expand_dims(x_train, axis=-1)
x_train = x_train.astype('float32') / 255
# train set / target
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
# fit
sub_classing_model.fit(x_train, y_train, batch_size=128, epochs=1)
# 469/469 [==============================] - 2s 2ms/step - loss: 8.2821
New Model / Copy
For the subclasses model, we have to initiate the class object.
sub_classing_model_copy = ModelSubClassing(10)
sub_classing_model_copy.set_weights(sub_classing_model.get_weights()) # <- get and set wg
# plot for debug ; same as original plot
# but know, layer name is no longer same
# i.e. if, old: conv2d_40 , new/copy: conv2d_41
def clones(module, N):
Creation of N identical layers.
:param module: module to clone
:param N: number of copies
:return: keras model of module copies
for i in range(N):
m = copy.deepcopy(module)
return seqm
Notice when I created my model, I defined the call function with argument something = False,
when I used the model in function train_step, I put in "something =True, training = True", training is not defined in my call, but it is in the default tf.keras.model call.
Why am I able to execute this with no error? and the output basically prints a bunch of 'my call's.
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
train_ds = tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).shuffle(10000).batch(32)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.fl = Flatten()
self.d = Dense(10)
######My problem#######
def call(self, x, something=False):
if something:
tf.print('my call')
x = self.fl(x)
return self.d(x)
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()
def train_step(X,Y):
with tf.GradientTape() as tape:
######My problem#######
predictions = model(X, something =True, training = True)
loss = loss_object(Y, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
for epoch in range(3):
for X,Y in train_ds:
In the Model class, the call method documentation :
To call a model on an input, always use the __call__()̀ method, i.e.
model(inputs), which relies on the underlying call() method.
And indeed, the __call__ can take any input argument : def __call__(self, *args, **kwargs): (in Model class source code)
You can find a more detailed answer here
I am running tensorflow 2.4 on colab. I tried to save the model using tf.train.Checkpoint() since it includes model subclassing, but after restoration I saw It didn't restored any weights of my model.
Here are few snippets:
### From tensorflow tutorial nmt_with_attention
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
self.gru = tf.keras.layers.GRU(self.enc_units,
class NMT_Train(tf.keras.Model):
def __init__(self, inp_vocab_size, tar_vocab_size, max_length_inp, max_length_tar, emb_dims, units, batch_size, source_tokenizer, target_tokenizer):
super(NMT_Train, self).__init__()
self.encoder = Encoder(inp_vocab_size, emb_dims, units, batch_size)
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
model.fit(dataset, epochs=2)
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
model.encoder.gru.get_weights() ### get the output
##[array([[-0.0627057 , 0.05900152, 0.06614069, ...
model.optimizer.get_weights() ### get the output
##[90, array([[ 6.6851695e-05, -4.6736805e-06, -2.3183979e-05, ...
When I later restored it I didn't get any gru weights:
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
model.encoder.gru.get_weights() ### empty list
## []
model.optimizer.get_weights() ### empty list
## []
I also tried checkpoint.restore(manager.latest_checkpoint) but nothing changed.
Is there any thing wrong I am doing?? Or suggest any other way around to save the model so that I can retrain it for further epochs.
You are defining a keras model, so why do not use keras model chekpoints?
From Keras documentation:
model.compile(loss=..., optimizer=...,
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
# Model weights are saved at the end of every epoch, if it's the best seen
# so far.
model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])
# The model weights (that are considered the best) are loaded into the model.
I have a VAE architecture script as follows:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Lambda, Reshape, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
INPUT_DIM = (64,64,3)
CONV_FILTERS = [32,64,64, 128]
CONV_STRIDES = [2,2,2,2]
CONV_ACTIVATIONS = ['relu','relu','relu','relu']
CONV_T_FILTERS = [64,64,32,3]
CONV_T_STRIDES = [2,2,2,2]
CONV_T_ACTIVATIONS = ['relu','relu','relu','sigmoid']
Z_DIM = 32
class Sampling(Layer):
def call(self, inputs):
mu, log_var = inputs
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
return mu + K.exp(log_var / 2) * epsilon
class VAEModel(Model):
def __init__(self, encoder, decoder, r_loss_factor, **kwargs):
super(VAEModel, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.r_loss_factor = r_loss_factor
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
def compute_kernel(x, y):
x_size = tf.shape(x)[0]
y_size = tf.shape(y)[0]
dim = tf.shape(x)[1]
tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
def compute_mmd(x, y):
x_kernel = compute_kernel(x, x)
y_kernel = compute_kernel(y, y)
xy_kernel = compute_kernel(x, y)
return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.square(data - reconstruction), axis = [1,2,3]
reconstruction_loss *= self.r_loss_factor
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_sum(kl_loss, axis = 1)
kl_loss *= -0.5
true_samples = tf.random.normal(tf.stack([BATCH_SIZE, Z_DIM]))
loss_mmd = compute_mmd(true_samples, z)
total_loss = reconstruction_loss + loss_mmd
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
"mmd_loss": loss_mmd
def call(self,inputs):
latent = self.encoder(inputs)
return self.decoder(latent)
class VAE():
def __init__(self):
self.models = self._build()
self.full_model = self.models[0]
self.encoder = self.models[1]
self.decoder = self.models[2]
self.input_dim = INPUT_DIM
self.z_dim = Z_DIM
self.learning_rate = LEARNING_RATE
self.kl_tolerance = KL_TOLERANCE
def _build(self):
vae_x = Input(shape=INPUT_DIM, name='observation_input')
vae_c1 = Conv2D(filters = CONV_FILTERS[0], kernel_size = CONV_KERNEL_SIZES[0], strides = CONV_STRIDES[0], activation=CONV_ACTIVATIONS[0], name='conv_layer_1')(vae_x)
vae_c2 = Conv2D(filters = CONV_FILTERS[1], kernel_size = CONV_KERNEL_SIZES[1], strides = CONV_STRIDES[1], activation=CONV_ACTIVATIONS[0], name='conv_layer_2')(vae_c1)
vae_c3= Conv2D(filters = CONV_FILTERS[2], kernel_size = CONV_KERNEL_SIZES[2], strides = CONV_STRIDES[2], activation=CONV_ACTIVATIONS[0], name='conv_layer_3')(vae_c2)
vae_c4= Conv2D(filters = CONV_FILTERS[3], kernel_size = CONV_KERNEL_SIZES[3], strides = CONV_STRIDES[3], activation=CONV_ACTIVATIONS[0], name='conv_layer_4')(vae_c3)
vae_z_in = Flatten()(vae_c4)
vae_z_mean = Dense(Z_DIM, name='mu')(vae_z_in)
vae_z_log_var = Dense(Z_DIM, name='log_var')(vae_z_in)
vae_z = Sampling(name='z')([vae_z_mean, vae_z_log_var])
vae_z_input = Input(shape=(Z_DIM,), name='z_input')
vae_dense = Dense(1024, name='dense_layer')(vae_z_input)
vae_unflatten = Reshape((1,1,DENSE_SIZE), name='unflatten')(vae_dense)
vae_d1 = Conv2DTranspose(filters = CONV_T_FILTERS[0], kernel_size = CONV_T_KERNEL_SIZES[0] , strides = CONV_T_STRIDES[0], activation=CONV_T_ACTIVATIONS[0], name='deconv_layer_1')(vae_unflatten)
vae_d2 = Conv2DTranspose(filters = CONV_T_FILTERS[1], kernel_size = CONV_T_KERNEL_SIZES[1] , strides = CONV_T_STRIDES[1], activation=CONV_T_ACTIVATIONS[1], name='deconv_layer_2')(vae_d1)
vae_d3 = Conv2DTranspose(filters = CONV_T_FILTERS[2], kernel_size = CONV_T_KERNEL_SIZES[2] , strides = CONV_T_STRIDES[2], activation=CONV_T_ACTIVATIONS[2], name='deconv_layer_3')(vae_d2)
vae_d4 = Conv2DTranspose(filters = CONV_T_FILTERS[3], kernel_size = CONV_T_KERNEL_SIZES[3] , strides = CONV_T_STRIDES[3], activation=CONV_T_ACTIVATIONS[3], name='deconv_layer_4')(vae_d3)
vae_encoder = Model(vae_x, [vae_z_mean, vae_z_log_var, vae_z], name = 'encoder')
vae_decoder = Model(vae_z_input, vae_d4, name = 'decoder')
vae_full = VAEModel(vae_encoder, vae_decoder, 10000)
opti = Adam(lr=LEARNING_RATE)
return (vae_full,vae_encoder, vae_decoder)
def set_weights(self, filepath):
def train(self, data):
self.full_model.fit(data, data,
def save_weights(self, filepath):
vae = VAE()
line 2200, in load_weights
'Unable to load weights saved in HDF5 format into a subclassed ' ValueError: Unable to load weights saved in HDF5 format into a
subclassed Model which has not created its variables yet. Call the
Model first, then load the weights.
I am not sure what this means since I am not that proficient in OOP. The surprising bit is that the above code was working until it stopped working. The model is training from scratch and it saves the weights in filepath. But when I am loading the same weights now it is throwing the above error!
If you set model.built = True prior to loading the model weights it works.
i was getting same same error while loading weights via
ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.
solved it by building model before loading weights
model.build(input_shape = <INPUT_SHAPE>)
ps, tensorflow Version: 2.5.0
What version of TF are you running? For a while the default saving format was hdf5, but this format cannot support subclassed models as easily, so you get this error. It may be solvable by first training it on a single batch and then loading the weights (to determine how the parts are connected, which is not saved in hdf5).
In the future I would recommend making sure that all saves are done with the TF file format though, it will save you from extra work.
As alwaysmvp45 pointed out "hdf5 does not store how the layers are connected". To make these layers be connected, another way is that you call the model to predict a zeros array with input shape ((1,w,h,c)) before loading weights:
Not sure if this has changed in more recent versions (I'm on 2.4). but I had to go this route:
# Do all the build and training
# ...
# Save the weights
# delete any reference to the model
del model
# Now do the load for testing
from tensorflow import keras
model = keras.models.load_model('path/to/location.h5')
If I tried the other suggestions, I got warnings about the layers not being present and I had to build the same model that I did the training on. No big deal, stick it in in a function somewhere, but this works better for me.
I am trying to utilize Model.fit_generator of Keras in tensorflow 1.10.
Simplified reproducible code:
import tensorflow as tf
import numpy as np
class TestNet(tf.keras.Model):
def __init__(self, class_count, name='TestNet', **kwargs):
super(TestNet, self).__init__(name=name, **kwargs)
self.convolution = tf.keras.layers.Conv1D(class_count, kernel_size=1, input_shape=(None, 3))
def call(self, points):
return self.convolution(points)
def segmentation_loss(labels, logits):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
return tf.reduce_mean(cross_entropy)
def generate():
while True:
yield (np.zeros(shape=(100,3)), np.zeros(shape=(100)))
if __name__ == "__main__":
test_net = TestNet(class_count=5)
optimizer = tf.keras.optimizers.Adam()
test_net.compile(optimizer, loss=segmentation_loss)
history = test_net.fit_generator(generate, steps_per_epoch=1000, epochs=10)
While this works in tensorflow 1.14, executing this in 1.10 yields the NotImplementedError in the title:
NotImplementedError: fit_generator is not yet enabled for unbuilt Model subclasses
Anybody knows how to work around this?
I see a mistake in fit_generator, update to this:
history = test_net.fit_generator(generate(), steps_per_epoch=1000, epochs=10)
Now, I don't know how Tensorflow 1.10 works, but this kind of modeling is rather new, usually a Keras model is built like this:
#model's inputs
inputs = Input((None,3))
#model's layers
convolution = Conv1D(class_count, kernel_size=1)
#model's call
outputs = convolution(inputs)
#model finish
test_net = Model(inputs, outputs)
A workaround for you:
inputs = Input(shape)
test_net = TestNet(...)
outputs = test_net.call(inputs)
test_net_model = Model(inputs, outputs)
I am quite new to machine learning and I am trying to implement my custom layer in keras. I found a couple of tutorials and it seems comparatively straight forward. What I do not understand, though, is how to implement my new custom layer in Sequential(). See for example this classification problem that I took from the tensorflow website(https://www.tensorflow.org/tutorials/keras/basic_text_classification), posted here for your convenience:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras
import numpy as np
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()
# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2 # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
test_data = keras.preprocessing.sequence.pad_sequences(test_data,
# input shape is the vocabulary count used for the movie reviews (10,000 words)
vocab_size = 10000
model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, 16))
model.add(keras.layers.Dense(16, activation=tf.nn.relu))
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))
x_val = train_data[:10000]
partial_x_train = train_data[10000:]
y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]
history = model.fit(partial_x_train,
validation_data=(x_val, y_val),
results = model.evaluate(test_data, test_labels)
Do I have to change the source code for keras.Sequential() or is there an easy way?
Furthermore, looking at the source code for the class Sequential() made me wonder: I can't figure out how functions like 'summary()','compile()', 'fit()' and 'evaluate()' can be called if those are not even provided in the source code in this class. Here is the source code for Sequential():
Sequential is a Model, and not a layer.
The functions you mentioned (summary, compile, fit, evaluate) are implemented in the Model class linked here, as Sequential is a subclass of Model.
If you're writing a custom layer, you should be subclassing Layer instead, and not Model or Sequential.
You would need to implement build, call, and compute_output_shape to create your own layer.
There's a few examples on the Keras documentation:
from keras import backend as K
from keras.layers import Layer
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.output_dim),
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
return K.dot(x, self.kernel)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
To use it, import the MyLayer class from whichever file you put it in, and then add it like the default Keras layers:
from custom.layers import MyLayer
model = keras.Sequential()