I'm trying to train a model in Tensorflow and get the error:
Attribute Error: 'method' object has no attribute '_from_serialized'
This is code that I have copied and seen work.
It seems that it has something to do with the compatibility of my tensorflow version and python version. I'm able to run other models, but this error seems to occur when I'm trying to track custom metrics.
What is the most recent compatible versions of Tensorflow-GPU and python can that run models while tracking custom metrics?
I've checked the table that Tensorflow provides and these versions should be compatible.
My current version of Tensorflow is 2.10.0
Python version is 3.9.6.
Is there something else that might cause this errors. I've created multiple environments with different versions and still receive this error.
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np
import tensorflow as tf
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) -
K.exp(self.log_variance), axis=1)
return kl_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
if __name__ == "__main__":
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
The attribute error here is raised because you can't set any attribute on a method object i.e;
class Foo:
def bar(self):
print("bar")
if __name__ == "__main__":
Foo().bar.baz = 1
Output:
Traceback (most recent call last): line 7, in <module>
Foo().bar.baz = 1
AttributeError: 'method' object has no attribute 'baz'
When collecting the metric information in training_utils_v1, the metrics specified when the model is compiled (model.compile(..., metrics=[..])) are iterated over, and for each metric, the attribute _from_serialized is set:
for i, metrics in enumerate(nested_metrics):
metrics_dict = collections.OrderedDict()
for metric in metrics:
metric_name = get_metric_name(metric, is_weighted)
metric_fn = get_metric_function(
metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]
)
metric_fn._from_serialized = from_serialized
In the example provided, two metrics are supplied to model.compile, and each are methods of the VAE class:
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
To test this, observe if metrics is entirely omitted, training will start successfully.
One of the metrics supplied, _calculate_reconstruction_loss is a method which does not need to be a method, as it does not refer to self in the body:
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
So that can be moved outside of the class and made into a function (some IDEs will recommend this to you in the form of a message to the effect of "Make function from method"):
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
The compile statement can then be revised:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
self._calculate_kl_loss])
The same exception will appear, since we're still referring to a method in the call (self.calculate_kl_loss) but if self.calculate_kl_loss is omitted, the training will start successfully:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss])
For completeness, reviewing what self.calculate_kl_loss is doing, it's a bit more tricky, but we can successfully use it as a metric by converting it into a function which takes a single argument model, which returns another function that takes arbitrary number of arguments (*args) such that it can be used both as a metric (which always expects a function of two arguments) and utilized in the loss function:
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
With these revisions, when training is started, the output is:
Epoch 1/100
10000/10000 [==============================] - 43s 4ms/sample - loss: 89.4243 - _calculate_reconstruction_loss: 0.0833 - _calculate_kl_loss: 6.1707
Epoch 2/100
10000/10000 [==============================] - 46s 5ms/sample - loss: 69.0131 - _calculate_reconstruction_loss: 0.0619 - _calculate_kl_loss: 7.1129
The entire snippet with revisions detailed above is:
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
calculate_kl_loss(self)])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = _calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = calculate_kl_loss(self)()
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
Maybe you forgot to call the method which should return the value the other method _from_serialized().
You did not specify your code but what the error infers is this:
# you probably passed this
my_method._from_serialized()
# instead of
my_method()._from_serialized()
Otherwise if it's an error regarding the loading of a model I found this issue on github that may help you.
To quote it:
I have the same problem in Keras version: 2.3.0 and in my case, this behaviour can be fixed by using tf.keras.models.load_model instead of direct load_model. I also change every import statement from 'keras' to 'tensorflow.keras' to avoid crash between old keras and new tensorflow.keras. hope it helps, cheers.
I want to implement a Hierarchical attention mechanism for document classification presented by Yang. But I want to replace LSTM with Transformer.
I used Apoorv Nandan's text classification with Transformer:
https://keras.io/examples/nlp/text_classification_with_transformer/
I have implemented Transformer hierarchically to classification. One for sentence representation and another one for document representation. The code is as follow:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils.np_utils import to_categorical
class MultiHeadSelfAttention(layers.Layer):
def __init__(self, embed_dim, num_heads=8):
super(MultiHeadSelfAttention, self).__init__()
self.embed_dim = embed_dim
self.num_heads = num_heads
if embed_dim % num_heads != 0:
raise ValueError(
f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
)
self.projection_dim = embed_dim // num_heads
self.query_dense = layers.Dense(embed_dim)
self.key_dense = layers.Dense(embed_dim)
self.value_dense = layers.Dense(embed_dim)
self.combine_heads = layers.Dense(embed_dim)
def attention(self, query, key, value):
score = tf.matmul(query, key, transpose_b=True)
dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
scaled_score = score / tf.math.sqrt(dim_key)
weights = tf.nn.softmax(scaled_score, axis=-1)
output = tf.matmul(weights, value)
return output, weights
def separate_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs):
# x.shape = [batch_size, seq_len, embedding_dim]
batch_size = tf.shape(inputs)[0]
query = self.query_dense(inputs) # (batch_size, seq_len, embed_dim)
key = self.key_dense(inputs) # (batch_size, seq_len, embed_dim)
value = self.value_dense(inputs) # (batch_size, seq_len, embed_dim)
query = self.separate_heads(
query, batch_size
) # (batch_size, num_heads, seq_len, projection_dim)
key = self.separate_heads(
key, batch_size
) # (batch_size, num_heads, seq_len, projection_dim)
value = self.separate_heads(
value, batch_size
) # (batch_size, num_heads, seq_len, projection_dim)
attention, weights = self.attention(query, key, value)
attention = tf.transpose(
attention, perm=[0, 2, 1, 3]
) # (batch_size, seq_len, num_heads, projection_dim)
concat_attention = tf.reshape(
attention, (batch_size, -1, self.embed_dim)
) # (batch_size, seq_len, embed_dim)
output = self.combine_heads(
concat_attention
) # (batch_size, seq_len, embed_dim)
return output
def compute_output_shape(self, input_shape):
# it does not change the shape of its input
return input_shape
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate, name=None):
super(TransformerBlock, self).__init__(name=name)
self.att = MultiHeadSelfAttention(embed_dim, num_heads)
self.ffn = keras.Sequential(
[layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(dropout_rate)
self.dropout2 = layers.Dropout(dropout_rate)
def call(self, inputs, training):
attn_output = self.att(inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
def compute_output_shape(self, input_shape):
# it does not change the shape of its input
return input_shape
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim, name=None):
super(TokenAndPositionEmbedding, self).__init__(name=name)
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
def compute_output_shape(self, input_shape):
# it changes the shape from (batch_size, maxlen) to (batch_size, maxlen, embed_dim)
return input_shape + (self.pos_emb.output_dim,)
# Lower level (produce a representation of each sentence):
embed_dim = 100 # Embedding size for each token
num_heads = 2 # Number of attention heads
ff_dim = 64 # Hidden layer size in feed forward network inside transformer
L1_dense_units = 100 # Size of the sentence-level representations output by the word-level model
dropout_rate = 0.1
vocab_size = 1000
class_number = 5
max_docs = 10000
max_sentences = 15
max_words = 60
word_input = layers.Input(shape=(max_words,), name='word_input')
word_embedding = TokenAndPositionEmbedding(maxlen=max_words, vocab_size=vocab_size,
embed_dim=embed_dim, name='word_embedding')(word_input)
word_transformer = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim,
dropout_rate=dropout_rate, name='word_transformer')(word_embedding)
word_pool = layers.GlobalAveragePooling1D(name='word_pooling')(word_transformer)
word_drop = layers.Dropout(dropout_rate, name='word_drop')(word_pool)
word_dense = layers.Dense(L1_dense_units, activation="relu", name='word_dense')(word_drop)
word_encoder = keras.Model(word_input, word_dense)
word_encoder.summary()
# =========================================================================
# Upper level (produce a representation of each document):
L2_dense_units = 100
sentence_input = layers.Input(shape=(max_sentences, max_words), name='sentence_input')
# This is the line producing "NotImplementedError":
sentence_encoder = tf.keras.layers.TimeDistributed(word_encoder, name='sentence_encoder')(sentence_input)
sentence_transformer = TransformerBlock(embed_dim=L1_dense_units, num_heads=num_heads, ff_dim=ff_dim,
dropout_rate=dropout_rate, name='sentence_transformer')(sentence_encoder)
sentence_dense = layers.TimeDistributed(layers.Dense(int(L2_dense_units)),name='sentence_dense')(sentence_transformer)
sentence_out = layers.Dropout(dropout_rate)(sentence_dense)
preds = layers.Dense(class_number , activation='softmax', name='sentence_output')(sentence_out)
model = keras.Model(sentence_input, preds)
model.summary()
#==========================================================================
Everything is OK(for testing you can copy and paste it in googlecolab). But when I compile and fit the model by following codes, it throws an error:
X = tf.random.uniform(shape=(max_docs, max_sentences, max_words), minval=1, maxval=1000, dtype=tf.dtypes.int32, seed=1)
y = tf.random.uniform(shape=(max_docs, ), minval=0, maxval=class_number , dtype=tf.dtypes.int32, seed=1)
y = to_categorical(y)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
X, y, batch_size=32, epochs=25,
)
The error is:
ValueError: Shapes (None, 5) and (None, 15, 5) are incompatible
When I had a similar error, I found that a Flatten() layer helped, I had incompatible shapes of (None, x, y) and (None, y).
If you try to provide a flatten layer for the part that gives you the (None, 15, 5), then it should output something like (None, 75).
The flatten layer merely removes dimensions, when I was doing this I got the output as (None, xy) and due to the way Tensorflow works, it was able to match both shapes as xy is obviously a factor of just y.
This Model is a variety of CNN and uses Causal Dilational Convolution Layer.
I can train and predict with 0 error, but when I use model.save() to save model, it throws Exception.
So I use save_weights and load_weights to save and load model.
I wonder why this error appears:
model.save("path")
out:
ValueError: Dimension size must be evenly divisible by 2 but is 745 for '{{node conv1d_5/SpaceToBatchND}} = SpaceToBatchND[T=DT_FLOAT, Tblock_shape=DT_INT32, Tpaddings=DT_INT32](conv1d_5/Pad, conv1d_5/SpaceToBatchND/block_shape, conv1d_5/SpaceToBatchND/paddings)' with input shapes: [?,745,32], [1], [1,2] and with computed input tensors: input[1] = <2>, input[2] = <[0 0]>.
Input shape is (None,743,27)
Output shape is (None,24,1)
def slice(x, seq_length):
return x[:, -seq_length:, :]
class ResidualBlock(tf.keras.layers.Layer):
def __init__(self, n_filters, filter_width, dilation_rate):
super(ResidualBlock, self).__init__()
self.n_filters = n_filters
self.filter_width = filter_width
self.dilation_rate = dilation_rate
# preprocessing - equivalent to time-distributed dense
self.x = Conv1D(32, 1, padding='same', activation='relu')
# filter convolution
self.x_f = Conv1D(filters=n_filters,
kernel_size=filter_width,
padding='causal',
dilation_rate=dilation_rate,
activation='tanh')
# gating convolution
self.x_g = Conv1D(filters=n_filters,
kernel_size=filter_width,
padding='causal',
dilation_rate=dilation_rate,
activation='sigmoid')
# postprocessing - equivalent to time-distributed dense
self.z_p = Conv1D(32, 1, padding='same', activation='relu')
def call(self, inputs):
x = self.x(inputs)
f = self.x_f(x)
g = self.x_g(x)
z = tf.multiply(f, g)
z = self.z_p(z)
return tf.add(x, z), z
def get_config(self):
config = super(ResidualBlock, self).get_config()
config.update({"n_filters": self.n_filters,
"filter_width": self.filter_width,
"dilation_rate": self.dilation_rate})
return config
class WaveNet(tf.keras.Model):
def __init__(self, n_filters=32, filter_width=2, dilation_rates=None, drop_out=0.2, pred_length=24):
super().__init__(name='WaveNet')
# Layer Parameter
self.n_filters = n_filters
self.filter_width = filter_width
self.drop_out = drop_out
self.pred_length = pred_length
if dilation_rates is None:
self.dilation_rates = [2 ** i for i in range(8)]
else:
self.dilation_rates = dilation_rates
# Layer
self.residual_stacks = []
for dilation_rate in self.dilation_rates:
self.residual_stacks.append(ResidualBlock(self.n_filters, self.filter_width, dilation_rate))
# self.add = Add()
self.cut = Lambda(slice, arguments={'seq_length': pred_length})
self.conv_1 = Conv1D(128, 1, padding='same')
self.relu = Activation('relu')
self.drop = Dropout(drop_out)
self.skip = Lambda(lambda x: x[:, -2 * pred_length + 1:-pred_length + 1, :1])
self.conv_2 = Conv1D(1, 1, padding='same')
def _unroll(self, inputs, **kwargs):
outputs = inputs
skips = []
for residual_block in self.residual_stacks:
outputs, z = residual_block(outputs)
skips.append(z)
outputs = self.relu(Add()(skips))
outputs = self.cut(outputs)
outputs = self.conv_1(outputs)
outputs = self.relu(outputs)
outputs = self.drop(outputs)
outputs = Concatenate()([outputs, self.skip(inputs)])
outputs = self.conv_2(outputs)
outputs = self.cut(outputs)
return outputs
def _get_output(self, input_tensor):
pass
def call(self, inputs, training=False, **kwargs):
if training:
return self._unroll(inputs)
else:
return self._get_output(inputs)
Train step
model = WaveNet()
model.compile(Adam(), loss=loss)
# ok
history = model.fit(train_x, train_y,
batch_size=batch_size,
epochs=epochs,
callbacks=[cp_callback] if save else None)
# ok
result = model.predict(test_x)
# error
model.save("path")
So I wrote this generalised TensorFlow code and want to save and restore models. But apparently the error is that there is no variables to save. I did everything as given in this official example. Ignore the __init__ method except the last line, since it only takes relevant parameters to train the model with, also there is no Syntax Errors. The error it produces is given below the code.
class Neural_Network(object):
def __init__(self, numberOfLayers, nodes, activations, learningRate,
optimiser = 'GradientDescent', regularizer = None,
dropout = 0.5, initializer = tf.contrib.layers.xavier_initializer()):
self.numberOfLayers = numberOfLayers
self.nodes = nodes
self.activations = activations
self.learningRate = learningRate
self.regularizer = regularizer
self.dropout = dropout
self.initializer = initializer
if(optimiser == 'GradientDescent'):
self.optimiser = tf.train.GradientDescentOptimizer(self.learningRate)
elif(optimiser == 'AdamOptimiser'):
self.optimiser = tf.train.AdamOptimizer(self.learningRate)
self.saver = tf.train.Saver()
def create_Neural_Net(self, numberOfFeatures):
self.numberOfFeatures = numberOfFeatures
self.X = tf.placeholder(dtype = tf.float32, shape = (None, self.numberOfFeatures), name = 'Input_Dataset')
#self.output = None
for i in range(0, self.numberOfLayers):
if(i == 0):
layer = tf.contrib.layers.fully_connected(self.X, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
elif(i == self.numberOfLayers-1):
self.output = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
else:
layer = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
def train_Neural_Net(self, dataset, labels, epochs):
entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.output, labels = labels, name = 'cross_entropy')
loss = tf.reduce_mean(entropy, name = 'loss')
hypothesis = tf.nn.softmax(self.output)
correct_preds = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
train_op = self.optimiser.minimize(loss)
self.loss=[]
self.accuracy = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(0, epochs):
_, l, acc = sess.run([train_op, loss, accuracy], feed_dict = {self.X:dataset})
print('Loss in epoch ' + str(i) + ' is: ' + str(l))
self.loss.append(l)
self.accuracy.append(acc)
self.saver.save(sess, './try.ckpt')
return self.loss, self.accuracy
And ran this code as:
nn = Neural_Network(2, [20,3], [tf.nn.relu, tf.nn.relu], 0.001, optimiser = 'AdamOptimiser')
nn.create_Neural_Net(4)
nn.train_Neural_Net(dataset, labels, 1000)
The error it gives is:
ValueError: No variables to save
So what is wrong in this code? And how can I fix it?