Related
I am trying to implement a custom modified ReLU in Tensorflow 1, in which I use two learnable parameters. But the parameters are not getting learnt even after running 1000 training steps, as suggested by printing their values before and after training. I have observed that inside the function, when I do not split x, i.e. execute the commented lines, then the coefficients are learnt. Could anyone suggest why splitting the input results in the trainable coefficients not being learnt and how this can be resolved?
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
def weight_variable(shape,vari_name):
initial = tf.truncated_normal(shape, stddev=0.1,dtype=tf.float32)
return tf.Variable(initial,name = vari_name)
def init_Prelu_coefficient(var1, var2):
coeff = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
coeff1 = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
return tf.Variable(coeff, trainable=True, name=var1), tf.Variable(coeff1, trainable=True, name=var2)
def Prelu(x, coeff, coeff1):
s = int(x.shape[-1])
sop = x[:,:,:,:s//2]*coeff+x[:,:,:,s//2:]*coeff1
sop1 = x[:,:,:,:s//2]*coeff-x[:,:,:,s//2:]*coeff1
copied_variable = tf.concat([sop, sop1], axis=-1)
copied_variable = tf.math.maximum(copied_variable,0)/copied_variable
# copied_variable = tf.identity(x)
# copied_variable = tf.math.maximum(copied_variable*coeff+copied_variable*coeff1,0)/copied_variable
# copied_variable = tf.multiply(copied_variable,x)
return copied_variable
def conv2d_dilate(x, W, dilate_rate):
return tf.nn.convolution(x, W,padding='VALID',dilation_rate = [1,dilate_rate])
matr = np.random.rand(1, 60, 40, 8)
target = np.random.rand(1, 58, 36, 8)
def learning(sess):
# define placeholder for inputs to network
Input = tf.placeholder(tf.float32, [1, 60, 40, 8])
input_Target = tf.placeholder(tf.float32, [1, 58, 36, 8])
kernel = weight_variable([3, 3, 8, 8],'G1')
coeff, coeff1 = init_Prelu_coefficient('alpha', 'alpha1')
conv = Prelu(conv2d_dilate(Input, kernel , 2), coeff, coeff1)
error_norm = 1*tf.norm(input_Target - conv)
print("MOMENTUM LEARNING")
train_step = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9,use_nesterov=False).minimize(error_norm)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
print("INIT coefficient ", sess.run(coeff), sess.run(coeff1))
init_var = tf.trainable_variables()
error_prev = 1 # initial error, we set 1 and it began to decrease.
for i in range(1000):
sess.run(train_step, feed_dict={Input: matr, input_Target: target})
if i % 100 == 0:
error_now=sess.run(error_norm,feed_dict={Input : matr, input_Target: target})
print('The',i,'th iteration gives an error',error_now)
error = sess.run(error_norm,feed_dict={Input: matr, input_Target: target})
print(sess.run(kernel))
print("LEARNT coefficient ", sess.run(coeff), sess.run(coeff1))
sess = tf.Session()
learning(sess)
I'm trying to train a model in Tensorflow and get the error:
Attribute Error: 'method' object has no attribute '_from_serialized'
This is code that I have copied and seen work.
It seems that it has something to do with the compatibility of my tensorflow version and python version. I'm able to run other models, but this error seems to occur when I'm trying to track custom metrics.
What is the most recent compatible versions of Tensorflow-GPU and python can that run models while tracking custom metrics?
I've checked the table that Tensorflow provides and these versions should be compatible.
My current version of Tensorflow is 2.10.0
Python version is 3.9.6.
Is there something else that might cause this errors. I've created multiple environments with different versions and still receive this error.
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np
import tensorflow as tf
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) -
K.exp(self.log_variance), axis=1)
return kl_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
if __name__ == "__main__":
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
The attribute error here is raised because you can't set any attribute on a method object i.e;
class Foo:
def bar(self):
print("bar")
if __name__ == "__main__":
Foo().bar.baz = 1
Output:
Traceback (most recent call last): line 7, in <module>
Foo().bar.baz = 1
AttributeError: 'method' object has no attribute 'baz'
When collecting the metric information in training_utils_v1, the metrics specified when the model is compiled (model.compile(..., metrics=[..])) are iterated over, and for each metric, the attribute _from_serialized is set:
for i, metrics in enumerate(nested_metrics):
metrics_dict = collections.OrderedDict()
for metric in metrics:
metric_name = get_metric_name(metric, is_weighted)
metric_fn = get_metric_function(
metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]
)
metric_fn._from_serialized = from_serialized
In the example provided, two metrics are supplied to model.compile, and each are methods of the VAE class:
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
To test this, observe if metrics is entirely omitted, training will start successfully.
One of the metrics supplied, _calculate_reconstruction_loss is a method which does not need to be a method, as it does not refer to self in the body:
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
So that can be moved outside of the class and made into a function (some IDEs will recommend this to you in the form of a message to the effect of "Make function from method"):
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
The compile statement can then be revised:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
self._calculate_kl_loss])
The same exception will appear, since we're still referring to a method in the call (self.calculate_kl_loss) but if self.calculate_kl_loss is omitted, the training will start successfully:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss])
For completeness, reviewing what self.calculate_kl_loss is doing, it's a bit more tricky, but we can successfully use it as a metric by converting it into a function which takes a single argument model, which returns another function that takes arbitrary number of arguments (*args) such that it can be used both as a metric (which always expects a function of two arguments) and utilized in the loss function:
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
With these revisions, when training is started, the output is:
Epoch 1/100
10000/10000 [==============================] - 43s 4ms/sample - loss: 89.4243 - _calculate_reconstruction_loss: 0.0833 - _calculate_kl_loss: 6.1707
Epoch 2/100
10000/10000 [==============================] - 46s 5ms/sample - loss: 69.0131 - _calculate_reconstruction_loss: 0.0619 - _calculate_kl_loss: 7.1129
The entire snippet with revisions detailed above is:
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
calculate_kl_loss(self)])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = _calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = calculate_kl_loss(self)()
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
Maybe you forgot to call the method which should return the value the other method _from_serialized().
You did not specify your code but what the error infers is this:
# you probably passed this
my_method._from_serialized()
# instead of
my_method()._from_serialized()
Otherwise if it's an error regarding the loading of a model I found this issue on github that may help you.
To quote it:
I have the same problem in Keras version: 2.3.0 and in my case, this behaviour can be fixed by using tf.keras.models.load_model instead of direct load_model. I also change every import statement from 'keras' to 'tensorflow.keras' to avoid crash between old keras and new tensorflow.keras. hope it helps, cheers.
Hi I've been working on a neural network to tackle the MNIST dataset, but when I run the code the accuracy begins to increase but eventually results in 0.098 accuracy, I also encounter an overflow error in exp when calculating the SoftMax values. I have tried to debug my code but I don't understand where I'm going wrong. If anyone can point me in the right direction that would be great and if you can't find an error could you give me any tips on techniques to try to debug this. Thanks in advance.
import numpy as np
import pandas as pd
df = pd.read_csv('../input/digit-recognizer/train.csv')
data = np.array(df.values)
data = data.T
data
Y = data[0,:]
X = data[1:,:]
Y_train = Y[:41000]
X_train = X[:,:41000]
X_train = X_train/255
Y_val = Y[41000:]
X_val = X[:,41000:]
X_val = X_val/255
print(np.max(X_train))
class NeuralNetwork:
def __init__(self, n_in, n_out):
self.w1, self.b1 = self.Generate_Weights_Biases(10,784)
self.w2, self.b2 = self.Generate_Weights_Biases(10,10)
def Generate_Weights_Biases(self, n_in, n_out):
weights = 0.01*np.random.randn(n_in, n_out)
biases = np.zeros((n_in,1))
return weights, biases
def forward(self, X):
self.Z1 = self.w1.dot(X) + self.b1
self.a1 = self.ReLu(self.Z1)
self.z2 = self.w2.dot(self.a1) + self.b2
y_pred = self.Softmax(self.z2)
return y_pred
def ReLu(self, Z):
return np.maximum(Z,0)
def Softmax(self, Z):
#exponentials = np.exp(Z)
#sumexp = np.sum(np.exp(Z), axis=0)
#print(Z)
return np.exp(Z)/np.sum(np.exp(Z))
def ReLu_Derv(self, x):
return np.greaterthan(x, 0).astype(int)
def One_hot_encoding(self, Y):
one_hot = np.zeros((Y.size, 10))
rows = np.arange(Y.size)
one_hot[rows, Y] = 1
one_hot = one_hot.T
return one_hot
def Get_predictions(self, y_pred):
return np.argmax(y_pred, 0)
def accuracy(self, pred, Y):
return np.sum(pred == Y)/Y.size
def BackPropagation(self, X, Y, y_pred, lr=0.01):
m = Y.size
one_hot_y = self.One_hot_encoding(Y)
e2 = y_pred - one_hot_y
derW2 = (1/m)* e2.dot(self.a1.T)
derB2 =(1/m) * e2
#derB2 = derB2.reshape(10,1)
e1 = self.w2.T.dot(e2) * self.ReLu(self.a1)
derW1 = (1/m) * e1.dot(X.T)
derB1 = (1/m) * e1
#derB1 = derB1.reshape(10,1)
self.w1 = self.w1 - lr*derW1
self.b1 = self.b1 - lr*np.sum(derB1, axis=1, keepdims=True)
self.w2 = self.w2 - lr*derW2
self.b2 = self.b2 - lr*np.sum(derB2, axis=1, keepdims=True)
def train(self, X, Y, epochs = 1000):
for i in range(epochs):
y_pred = self.forward(X)
predict = self.Get_predictions(y_pred)
accuracy = self.accuracy(predict, Y)
print(accuracy)
self.BackPropagation(X, Y, y_pred)
return self.w1, self.b1, self.w2, self.b2
NN = NeuralNetwork(X_train, Y_train)
w1,b1,w2,b2 = NN.train(X_train,Y_train)
I found the following errors:
Your softmax implementation doesn't work because of terrific numeric errors you get exponentiating potentially large numbers to obtain something between 0 and 1. And besides, you forgot to specify the summation axis in the denominator. Here is a working implementation:
def Softmax(self, Z):
e = np.exp(Z - Z.max(axis=0, keepdims=True))
return e/e.sum(axis=0, keepdims=True)
(Here and below I skip coding-style remarks that are not essential in this context. Like that this should be a class method or a stand-alone function etc.)
Your ReLu derivative implementation doesn't work for me at all. May be I have a different numpy version. This one works:
def ReLu_Derv(self, x):
return (x > 0).astype(int)
You need to actually use this implementation in BackPropagation:
e1 = self.w2.T.dot(e2) * self.ReLu_Derv(self.a1)
With these amendments, I managed to achieve 91.0% accuracy after 100 iteration with LR=0.1. I loaded MNIST from Keras with this code:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
X = train_images.reshape(-1, 28*28).T
Y = train_labels
Hi I'm trying to train my own designed neural network on the MNIST handwritten data set and every time I run this code the accuracy starts to increase then decreases and I get an overflow warning. Can someone explain whether my code is just poor and messy or whether I have just missed something little out. Thanks in advance
import numpy as np
import pandas as pd
df = pd.read_csv('../input/digit-recognizer/train.csv')
data = np.array(df.values)
data = data.T
data
Y = data[0,:]
X = data[1:,:]
Y_train = Y[:41000]
X_train = X[:,:41000]
X_train = X_train/255
Y_val = Y[41000:]
X_val = X[:,41000:]
X_val = X_val/255
print(np.max(X_train))
class NeuralNetwork:
def __init__(self, n_in, n_out):
self.w1, self.b1 = self.Generate_Weights_Biases(10,784)
self.w2, self.b2 = self.Generate_Weights_Biases(10,10)
def Generate_Weights_Biases(self, n_in, n_out):
weights = 0.01*np.random.randn(n_in, n_out)
biases = np.zeros((n_in,1))
return weights, biases
def forward(self, X):
self.Z1 = self.w1.dot(X) + self.b1
self.a1 = self.ReLu(self.Z1)
self.z2 = self.w2.dot(self.a1) + self.b1
y_pred = self.Softmax(self.z2)
return y_pred
def ReLu(self, Z):
return np.maximum(Z,0)
def Softmax(self, Z):
#exponentials = np.exp(Z)
#sumexp = np.sum(np.exp(Z), axis=0)
#print(Z)
return np.exp(Z)/np.sum(np.exp(Z))
def ReLu_Derv(self, x):
return np.greaterthan(x, 0).astype(int)
def One_hot_encoding(self, Y):
one_hot = np.zeros((Y.size, 10))
rows = np.arange(Y.size)
one_hot[rows, Y] = 1
one_hot = one_hot.T
return one_hot
def Get_predictions(self, y_pred):
return np.argmax(y_pred, 0)
def accuracy(self, pred, Y):
return np.sum(pred == Y)/Y.size
def BackPropagation(self, X, Y, y_pred, lr=0.01):
m = Y.size
one_hot_y = self.One_hot_encoding(Y)
e2 = y_pred - one_hot_y
derW2 = (1/m)* e2.dot(self.a1.T)
derB2 =(1/m) * np.sum(e2,axis=1)
derB2 = derB2.reshape(10,1)
e1 = self.w2.T.dot(e2) * self.ReLu(self.a1)
derW1 = (1/m) * e1.dot(X.T)
derB1 = (1/m) * np.sum(e1, axis=1)
derB1 = derB1.reshape(10,1)
self.w1 = self.w1 - lr*derW1
self.b1 = self.b1 - lr*derB1
self.w2 = self.w2 - lr*derW2
self.b2 = self.b2 - lr*derB2
def train(self, X, Y, epochs = 1000):
for i in range(epochs):
y_pred = self.forward(X)
predict = self.Get_predictions(y_pred)
accuracy = self.accuracy(predict, Y)
print(accuracy)
self.BackPropagation(X, Y, y_pred)
return self.w1, self.b1, self.w2, self.b2
NN = NeuralNetwork(X_train, Y_train)
w1,b1,w2,b2 = NN.train(X_train,Y_train)
You should use a different bias for the second layer
self.z2 = self.w2.dot(self.a1) + self.b1 # not b1
self.z2 = self.w2.dot(self.a1) + self.b2 # but b2
When doing something like this
derB2 =(1/m) * np.sum(e2,axis=1)
you would like to use (keepdims = True) to make sure that derB2.shape is (something,1) but not (something, ). It makes your code more rigorous.
I have been trying to run some experiments using the deepfix tool (https://bitbucket.org/iiscseal/deepfix) which is a seq2seq model for correcting common programming errors.
I made changes to the code so that it is compatible to TF-1.12, as the original code contains tensorflow.contrib.seq2seq functions which are not supported in version TF-1.12 (only in TF-1.0.x).
The main changes were in the seq2seq_model defined in neural_net/train.py.
Below is the changed code. I'm new to the tensorflow RNN, and coded the decoder part using help from online codes.
class seq2seq_model():
PAD = 0
EOS = 1
def __init__(self, vocab_size, embedding_size, max_output_seq_len,
cell_type='LSTM', memory_dim=300, num_layers=4, dropout=0.2,
attention=True,
scope=None,
verbose=False):
assert 0 <= dropout and dropout <= 1, '0 <= dropout <= 1, you passed dropout={}'.format(
dropout)
tf.set_random_seed(1189)
self.attention = attention
self.max_output_seq_len = max_output_seq_len
self.memory_dim = memory_dim
self.num_layers = num_layers
self.dropout = dropout
self.scope = scope
if dropout != 0:
self.keep_prob = tf.placeholder(tf.float32)
else:
self.keep_prob = None
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.encoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self.decoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self._make_graph()
if self.scope is not None:
saver_vars = [var for var in tf.global_variables(
) if var.name.startswith(self.scope)]
else:
saver_vars = tf.global_variables()
if verbose:
print 'root-scope:', self.scope
print "\n\nDiscovered %d saver variables." % len(saver_vars)
for each in saver_vars:
print each.name
self.saver = tf.train.Saver(saver_vars, max_to_keep=5)
#property
def decoder_hidden_units(self):
return self.memory_dim
def _make_graph(self):
self._init_placeholders()
self._init_decoder_train_connectors()
self._init_embeddings()
self._init_simple_encoder()
self._init_decoder()
self._init_optimizer()
def _init_placeholders(self):
""" Everything is time-major """
self.encoder_inputs = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='encoder_inputs',
)
self.encoder_inputs_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='encoder_inputs_length',
)
self.decoder_targets = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='decoder_targets'
)
self.decoder_targets_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='decoder_targets_length',
)
def _init_decoder_train_connectors(self):
with tf.name_scope('decoderTrainFeeds'):
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.PAD
self.decoder_train_inputs = tf.concat(
[EOS_SLICE, self.decoder_targets], axis=0, name="decoder_train_inputs")
self.decoder_train_length = self.decoder_targets_length + 1
decoder_train_targets = tf.concat(
[self.decoder_targets, PAD_SLICE], axis=0)
decoder_train_targets_seq_len, _ = tf.unstack(
tf.shape(decoder_train_targets))
decoder_train_targets_eos_mask = tf.one_hot(self.decoder_train_length - 1,
decoder_train_targets_seq_len,
on_value=self.EOS, off_value=self.PAD,
dtype=tf.int32)
decoder_train_targets_eos_mask = tf.transpose(
decoder_train_targets_eos_mask, [1, 0])
decoder_train_targets = tf.add(decoder_train_targets,
decoder_train_targets_eos_mask, name="decoder_train_targets")
self.decoder_train_targets = decoder_train_targets
self.loss_weights = tf.ones([
batch_size,
tf.reduce_max(self.decoder_train_length)
], dtype=tf.float32, name="loss_weights")
def _init_embeddings(self):
with tf.variable_scope("embedding") as scope:
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
self.embedding_matrix = tf.get_variable(
name="embedding_matrix",
shape=[self.vocab_size, self.embedding_size],
initializer=initializer,
dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.encoder_inputs,
name="encoder_inputs_embedded")
self.decoder_train_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.decoder_train_inputs,
name="decoder_train_inputs_embedded")
def _init_simple_encoder(self):
with tf.variable_scope("Encoder") as scope:
(self.encoder_outputs, self.encoder_state) = (
tf.nn.dynamic_rnn(cell=self.encoder_cell,
inputs=self.encoder_inputs_embedded,
sequence_length=self.encoder_inputs_length,
time_major=True,
dtype=tf.float32)
)
def _init_decoder(self):
with tf.variable_scope("decoder") as scope:
# def output_fn(outputs):
# return tf.contrib.layers.fully_connected(outputs, self.vocab_size, scope=scope,
# name = "output_fn")
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
train_helper = seq2seq.TrainingHelper(
inputs=self.decoder_train_inputs_embedded,
sequence_length=self.decoder_train_length,
time_major=True,
name="train_helper")
pred_helper = seq2seq.SampleEmbeddingHelper(
embedding=self.embedding_matrix,
start_tokens=tf.ones([batch_size], dtype=tf.int32) * self.EOS,
end_token=self.EOS)
# name="pred_helper")
def _decode(helper, scope, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
attention_states = tf.transpose(
self.encoder_outputs, [1, 0, 2])
attention_mechanism = seq2seq.BahdanauAttention(
num_units=self.decoder_hidden_units, memory=attention_states,
name="attention_mechanism")
attention_cell = seq2seq.AttentionWrapper(
self.decoder_cell, attention_mechanism,
name="atttention_wrapper")
out_cell = tf.contrib.rnn.OutputProjectionWrapper(
attention_cell, self.vocab_size, reuse=reuse)
# name="output_cell")
decoder = seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
# name="decoder")
outputs = seq2seq.dynamic_decode(
decoder=decoder, output_time_major=True,
impute_finished=True)
# name="outputs")
return outputs
(self.decoder_logits_train, self.decoder_state_train, _) = _decode(train_helper, "decoder")
(self.decoder_logits_inference, self.decoder_state_inference, _) = _decode(pred_helper, "decoder", reuse=True)
self.decoder_logits_train = self.decoder_logits_train.rnn_output
self.decoder_logits_inference = self.decoder_logits_inference.rnn_output
# self.decoder_logits_train = output_fn(self.decoder_outputs_train)
self.decoder_prediction_train = tf.argmax(
self.decoder_logits_train, axis=-1, name='decoder_prediction_train')
scope.reuse_variables()
self.decoder_prediction_inference = tf.argmax(self.decoder_logits_inference, axis=-1,
name='decoder_prediction_inference')
def _init_optimizer(self):
logits = tf.transpose(self.decoder_logits_train, [1, 0, 2])
targets = tf.transpose(self.decoder_train_targets, [1, 0])
self.loss = seq2seq.sequence_loss(logits=logits, targets=targets,
weights=self.loss_weights)
self.optimizer = tf.train.AdamOptimizer()
gvs = self.optimizer.compute_gradients(self.loss)
def ClipIfNotNone(grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1., 1)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(capped_gvs)
def make_feed_dict(self, x, x_len, y, y_len):
feed_dict = {
self.encoder_inputs: x,
self.encoder_inputs_length: x_len,
self.decoder_targets: y,
self.decoder_targets_length: y_len,
}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0 - self.dropout})
return feed_dict
def load_parameters(self, sess, filename):
self.saver.restore(sess, filename)
def save_parameters(self, sess, filename, global_step=None):
self.saver.save(sess, filename, global_step=global_step)
def train_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
_, loss = session.run([self.train_op, self.loss], feed_dict)
return loss
def validate_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
loss, decoder_prediction, decoder_train_targets = session.run([self.loss,
self.decoder_prediction_inference,
self.decoder_train_targets], feed_dict)
return loss, np.array(decoder_prediction).T, np.array(decoder_train_targets).T
def sample(self, session, X, X_len):
feed_dict = {self.encoder_inputs: X,
self.encoder_inputs_length: X_len}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0})
decoder_prediction = session.run(
self.decoder_prediction_inference, feed_dict)
return np.array(decoder_prediction).T
I am having some problems with this code:
Main problem - The seq2seq.train_step() and seq2seq.validate_step() functions are working, but when I use seq2seq.sample() for actually making inferences, I get an error that asks me to feed a value for decoder_targets. This is an unexpected behaviour as the SampleEmbeddingHelper function is used for inference which does not require decoder_targets. The error:
InvalidArgumentError (see above for traceback): You must feed a value
for placeholder tensor 'ids/decoder_targets' with dtype int32 and
shape [?,?] [[node ids/decoder_targets (defined at
.../code/neural_net/train.py:241) = Placeholderdtype=DT_INT32,
shape=[?,?],
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
When I try to use the GreedyEmbeddingHelper instead of SampleEmbeddingHelper, and then run decoder_logits_inference op, the machine hangs and runs out of memory after some time. Although SampleEmbeddingHelper works fine.
Well, SampleEmbeddingHelper does need decoder targets, since it mixes part of GreedyEmbeddingHelper(infer mode) and tf.contrib.seq2seq.TrainingHelper(teacher forcing). I think you just need to use GreedyEmbeddingHelper.
Since in the beginning, the parameters are totally random (if not pre-trained).
Maybe you have seen that the results of the first few loops of seq2seq model are totally messed up.
So if you use GreedyEmbeddingHelper, which outputs a result based on the previous one, and of course no one teaches it "where to stop", so it usually goes infinitely until your memory runs out. To solve this, you need to set an upper limit for the length of sentence in tf.contrib.seq2seq.dynamic_decode.
The argument is maximum_iterations. as shown in
tf.contrib.seq2seq.dynamic_decode