I'm trying to train a model in Tensorflow and get the error:
Attribute Error: 'method' object has no attribute '_from_serialized'
This is code that I have copied and seen work.
It seems that it has something to do with the compatibility of my tensorflow version and python version. I'm able to run other models, but this error seems to occur when I'm trying to track custom metrics.
What is the most recent compatible versions of Tensorflow-GPU and python can that run models while tracking custom metrics?
I've checked the table that Tensorflow provides and these versions should be compatible.
My current version of Tensorflow is 2.10.0
Python version is 3.9.6.
Is there something else that might cause this errors. I've created multiple environments with different versions and still receive this error.
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np
import tensorflow as tf
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) -
K.exp(self.log_variance), axis=1)
return kl_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
if __name__ == "__main__":
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
The attribute error here is raised because you can't set any attribute on a method object i.e;
class Foo:
def bar(self):
print("bar")
if __name__ == "__main__":
Foo().bar.baz = 1
Output:
Traceback (most recent call last): line 7, in <module>
Foo().bar.baz = 1
AttributeError: 'method' object has no attribute 'baz'
When collecting the metric information in training_utils_v1, the metrics specified when the model is compiled (model.compile(..., metrics=[..])) are iterated over, and for each metric, the attribute _from_serialized is set:
for i, metrics in enumerate(nested_metrics):
metrics_dict = collections.OrderedDict()
for metric in metrics:
metric_name = get_metric_name(metric, is_weighted)
metric_fn = get_metric_function(
metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]
)
metric_fn._from_serialized = from_serialized
In the example provided, two metrics are supplied to model.compile, and each are methods of the VAE class:
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
To test this, observe if metrics is entirely omitted, training will start successfully.
One of the metrics supplied, _calculate_reconstruction_loss is a method which does not need to be a method, as it does not refer to self in the body:
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
So that can be moved outside of the class and made into a function (some IDEs will recommend this to you in the form of a message to the effect of "Make function from method"):
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
The compile statement can then be revised:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
self._calculate_kl_loss])
The same exception will appear, since we're still referring to a method in the call (self.calculate_kl_loss) but if self.calculate_kl_loss is omitted, the training will start successfully:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss])
For completeness, reviewing what self.calculate_kl_loss is doing, it's a bit more tricky, but we can successfully use it as a metric by converting it into a function which takes a single argument model, which returns another function that takes arbitrary number of arguments (*args) such that it can be used both as a metric (which always expects a function of two arguments) and utilized in the loss function:
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
With these revisions, when training is started, the output is:
Epoch 1/100
10000/10000 [==============================] - 43s 4ms/sample - loss: 89.4243 - _calculate_reconstruction_loss: 0.0833 - _calculate_kl_loss: 6.1707
Epoch 2/100
10000/10000 [==============================] - 46s 5ms/sample - loss: 69.0131 - _calculate_reconstruction_loss: 0.0619 - _calculate_kl_loss: 7.1129
The entire snippet with revisions detailed above is:
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
calculate_kl_loss(self)])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = _calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = calculate_kl_loss(self)()
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
Maybe you forgot to call the method which should return the value the other method _from_serialized().
You did not specify your code but what the error infers is this:
# you probably passed this
my_method._from_serialized()
# instead of
my_method()._from_serialized()
Otherwise if it's an error regarding the loading of a model I found this issue on github that may help you.
To quote it:
I have the same problem in Keras version: 2.3.0 and in my case, this behaviour can be fixed by using tf.keras.models.load_model instead of direct load_model. I also change every import statement from 'keras' to 'tensorflow.keras' to avoid crash between old keras and new tensorflow.keras. hope it helps, cheers.
Related
I'm currently switching from tensorflow to pytorch and facing the warning UserWarning: Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size
I came across that unsqueeze(1) on my target could help to resolve my problem, however, I do so obtain problems in regard of the multitarget which results from the shape my loss function (crossentropy) expects.
Here is a minimal example to my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
X1 = torch.randn(400, 1, 9999)
X2 = torch.randn((400,1, 9999))
aux1 = torch.randn(400,1)
aux2 = torch.randn(400,1)
aux3 = torch.randn(400,1)
y1 = torch.rand(400,)
y2 = torch.rand(400,)
y3 = torch.rand(400,)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
# In[18]:
class MultiTaskDataset:
def __init__(self,
amplitude,
phase,
weight,
temperature,
humidity,
shelf_life_clf,
shelf_life_pred,
thickness_pred
):
self.amplitude = amplitude
self.phase = phase
self.weight = weight
self.temperature = temperature
self.humidity = humidity
self.shelf_life_clf = shelf_life_clf
self.shelf_life_pred = shelf_life_pred
self.thickness_pred = thickness_pred
def __len__(self):
return self.amplitude.shape[0]
def __getitem__(self, idx):
#inputs
amplitude = self.amplitude[idx]
phase = self.phase[idx]
weight = self.weight[idx]
temperature = self.temperature[idx]
humidity = self.humidity[idx]
#outputs
shelf_life_clf = self.shelf_life_clf[idx]
shelf_life_reg = self.shelf_life_pred[idx]
thickness_pred = self.thickness_pred[idx]
return ([torch.tensor(amplitude, dtype=torch.float32),
torch.tensor(phase, dtype=torch.float32),
torch.tensor(weight, dtype=torch.float32),
torch.tensor(temperature, dtype=torch.float32),
torch.tensor(humidity, dtype=torch.float32)],
[torch.tensor(shelf_life_clf, dtype=torch.long),
torch.tensor(shelf_life_reg, dtype=torch.float32),
torch.tensor(thickness_pred, dtype=torch.float32)])
# In[19]:
# train loader
dataset = MultiTaskDataset(X1, X2, aux1, aux2, aux3,
y1,y2,y3)
train_loader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0)
# test loader
# In[20]:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.features_amp = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.features_phase = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.backbone1 = nn.Sequential(
nn.LazyConv1d(64,3,1),
nn.LazyConv1d(64,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone2 = nn.Sequential(
nn.Conv1d(64, 32,3,1),
nn.Conv1d(32, 32,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone3 = nn.Sequential(
nn.Conv1d(32, 16,3,1),
nn.Conv1d(16, 16,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.classifier = nn.LazyLinear(2)
self.shelf_life_reg = nn.LazyLinear(1)
self.thickness_reg = nn.LazyLinear(1)
def forward(self, x1, x2, aux1, aux2, aux3):
x1 = self.features_amp(x1)
x2 = self.features_phase(x2)
x1 = x1.view(x1.size(0),-1)
x2 = x2.view(x2.size(0),-1)
x = torch.cat((x1, x2), dim=-1)
print(x.size())
x = x.unsqueeze(1)
print(x.size())
x = self.backbone1(x)
print(x.size())
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = torch.cat([x, aux1, aux2, aux3], dim=-1)
shelf_life_clf = self.classifier(x)
shelf_life_reg = self.shelf_life_reg(x)
thickness_reg = self.thickness_reg(x)
return (shelf_life_clf,
shelf_life_reg,
thickness_reg)
model = MyModel()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
# In[21]:
def train(epoch):
model.train()
#exp_lr_scheduler.step()
arr_loss = []
#first_batch = next(iter(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
#amp, phase = data
clf, reg1, reg2 = target
#print(amp.shape, phase.shape)
#print(target[2].shape)
if torch.cuda.is_available():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = [data[i].cuda() for i in range(len(data))]
target = [target[i].cuda() for i in range(len(target))]
model.to(device)
optimizer.zero_grad()
output1, output2, output3 = model(*data)
#losses
loss = criterion1(output1, target[0].long())
loss1 = criterion2(output2, target[1].float())
loss2 = criterion3(output3, target[2].float())
loss = loss + loss1 + loss2
#metrices
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.data))
arr_loss.append(loss.data)
return arr_loss
def averaged_accuracy(outputs, targets):
assert len(outputs) != len(targets), "number of outputs should equal the number of targets"
accuracy = []
for i in range(len(outputs)):
_, predicted = torch.max(output1.data, 1)
total += target[0].size(0)
correct += (predicted == target[0]).sum()
acc = correct / total *100
accuracy.append(acc)
return torch.mean(accuracy)
# In[22]:
optimizer = optim.Adam(model.parameters(), lr=0.00003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
n_epochs = 10
for epoch in range(n_epochs):
train(epoch)
Can anybody provide guidance to resolve this problem?
This Model is a variety of CNN and uses Causal Dilational Convolution Layer.
I can train and predict with 0 error, but when I use model.save() to save model, it throws Exception.
So I use save_weights and load_weights to save and load model.
I wonder why this error appears:
model.save("path")
out:
ValueError: Dimension size must be evenly divisible by 2 but is 745 for '{{node conv1d_5/SpaceToBatchND}} = SpaceToBatchND[T=DT_FLOAT, Tblock_shape=DT_INT32, Tpaddings=DT_INT32](conv1d_5/Pad, conv1d_5/SpaceToBatchND/block_shape, conv1d_5/SpaceToBatchND/paddings)' with input shapes: [?,745,32], [1], [1,2] and with computed input tensors: input[1] = <2>, input[2] = <[0 0]>.
Input shape is (None,743,27)
Output shape is (None,24,1)
def slice(x, seq_length):
return x[:, -seq_length:, :]
class ResidualBlock(tf.keras.layers.Layer):
def __init__(self, n_filters, filter_width, dilation_rate):
super(ResidualBlock, self).__init__()
self.n_filters = n_filters
self.filter_width = filter_width
self.dilation_rate = dilation_rate
# preprocessing - equivalent to time-distributed dense
self.x = Conv1D(32, 1, padding='same', activation='relu')
# filter convolution
self.x_f = Conv1D(filters=n_filters,
kernel_size=filter_width,
padding='causal',
dilation_rate=dilation_rate,
activation='tanh')
# gating convolution
self.x_g = Conv1D(filters=n_filters,
kernel_size=filter_width,
padding='causal',
dilation_rate=dilation_rate,
activation='sigmoid')
# postprocessing - equivalent to time-distributed dense
self.z_p = Conv1D(32, 1, padding='same', activation='relu')
def call(self, inputs):
x = self.x(inputs)
f = self.x_f(x)
g = self.x_g(x)
z = tf.multiply(f, g)
z = self.z_p(z)
return tf.add(x, z), z
def get_config(self):
config = super(ResidualBlock, self).get_config()
config.update({"n_filters": self.n_filters,
"filter_width": self.filter_width,
"dilation_rate": self.dilation_rate})
return config
class WaveNet(tf.keras.Model):
def __init__(self, n_filters=32, filter_width=2, dilation_rates=None, drop_out=0.2, pred_length=24):
super().__init__(name='WaveNet')
# Layer Parameter
self.n_filters = n_filters
self.filter_width = filter_width
self.drop_out = drop_out
self.pred_length = pred_length
if dilation_rates is None:
self.dilation_rates = [2 ** i for i in range(8)]
else:
self.dilation_rates = dilation_rates
# Layer
self.residual_stacks = []
for dilation_rate in self.dilation_rates:
self.residual_stacks.append(ResidualBlock(self.n_filters, self.filter_width, dilation_rate))
# self.add = Add()
self.cut = Lambda(slice, arguments={'seq_length': pred_length})
self.conv_1 = Conv1D(128, 1, padding='same')
self.relu = Activation('relu')
self.drop = Dropout(drop_out)
self.skip = Lambda(lambda x: x[:, -2 * pred_length + 1:-pred_length + 1, :1])
self.conv_2 = Conv1D(1, 1, padding='same')
def _unroll(self, inputs, **kwargs):
outputs = inputs
skips = []
for residual_block in self.residual_stacks:
outputs, z = residual_block(outputs)
skips.append(z)
outputs = self.relu(Add()(skips))
outputs = self.cut(outputs)
outputs = self.conv_1(outputs)
outputs = self.relu(outputs)
outputs = self.drop(outputs)
outputs = Concatenate()([outputs, self.skip(inputs)])
outputs = self.conv_2(outputs)
outputs = self.cut(outputs)
return outputs
def _get_output(self, input_tensor):
pass
def call(self, inputs, training=False, **kwargs):
if training:
return self._unroll(inputs)
else:
return self._get_output(inputs)
Train step
model = WaveNet()
model.compile(Adam(), loss=loss)
# ok
history = model.fit(train_x, train_y,
batch_size=batch_size,
epochs=epochs,
callbacks=[cp_callback] if save else None)
# ok
result = model.predict(test_x)
# error
model.save("path")
In the code here below the vector rand is initialized when I call the first time the function create_model().
def create_model(num_columns):
inp_layer = tfl.Input((num_columns,))
rand = tf.random.uniform((1,num_columns), minval = 0, maxval = 2, dtype = tf.int32), tf.float32))
inp_rand = tfl.Multiply()([inp_layer, rand])
dense = tfl.Dense(256, activation = 'relu')(inp_rand)
dense = tfl.Dense(128, activation = 'relu')(dense)
dense = tfl.Dense(64, activation = 'sigmoid')(dense)
model = tf.keras.Model(inputs = inp_layer, outputs = dense)
model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
model = create_model(num_columns)
model.fit()
I would like it to be regenerated with new random values every time I call the function model.fit(), or even better, at any batch during the execution of model.fit().
Would you know how I can do this?
You can change what happens in the call() method of a subclassed Keras model.
def call(self, x, training=None, **kwargs):
rand = tf.cast(tf.random.uniform((1, *x.shape[1:]), 0, 2, tf.int32), tf.float32)
x = tf.multiply(x, rand)
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
Here I did it using the MNIST, where I multipled the input tensor with a random tensor of the same shape:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
dataset, info = tfds.load('mnist', with_info=True)
train, test = dataset['train'], dataset['test']
def prepare(dataset):
inputs = tf.divide(x=dataset['image'], y=255)
targets = tf.one_hot(indices=dataset['label'], depth=10)
return inputs, targets
train = train.take(5_000).batch(4).map(prepare)
test = test.take(1_000).batch(4).map(prepare)
class MyCNN(K.Model):
def __init__(self):
super(MyCNN, self).__init__()
Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
MaxPool = partial(MaxPooling2D, pool_size=(2, 2))
self.conv1 = Conv(filters=8)
self.maxp1 = MaxPool()
self.conv2 = Conv(filters=16)
self.maxp2 = MaxPool()
self.flatt = Flatten()
self.dens1 = Dense(64, activation=nn.relu)
self.drop1 = Dropout(.5)
self.dens2 = Dense(10, activation=nn.softmax)
def call(self, x, training=None, **kwargs):
rand = tf.cast(tf.random.uniform((1, *x.shape[1:]), 0, 2, tf.int32), tf.float32)
x = tf.multiply(x, rand)
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
model = MyCNN()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train, validation_data=test, epochs=10,
steps_per_epoch=1250, validation_steps=250)
What I was trying to implement is actually the Dropout layer. So no point in searching further.
I have a VAE architecture script as follows:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Lambda, Reshape, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
INPUT_DIM = (64,64,3)
CONV_FILTERS = [32,64,64, 128]
CONV_KERNEL_SIZES = [4,4,4,4]
CONV_STRIDES = [2,2,2,2]
CONV_ACTIVATIONS = ['relu','relu','relu','relu']
DENSE_SIZE = 1024
CONV_T_FILTERS = [64,64,32,3]
CONV_T_KERNEL_SIZES = [5,5,6,6]
CONV_T_STRIDES = [2,2,2,2]
CONV_T_ACTIVATIONS = ['relu','relu','relu','sigmoid']
Z_DIM = 32
BATCH_SIZE = 100
LEARNING_RATE = 0.0001
KL_TOLERANCE = 0.5
class Sampling(Layer):
def call(self, inputs):
mu, log_var = inputs
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
return mu + K.exp(log_var / 2) * epsilon
class VAEModel(Model):
def __init__(self, encoder, decoder, r_loss_factor, **kwargs):
super(VAEModel, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.r_loss_factor = r_loss_factor
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
def compute_kernel(x, y):
x_size = tf.shape(x)[0]
y_size = tf.shape(y)[0]
dim = tf.shape(x)[1]
tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
def compute_mmd(x, y):
x_kernel = compute_kernel(x, x)
y_kernel = compute_kernel(y, y)
xy_kernel = compute_kernel(x, y)
return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.square(data - reconstruction), axis = [1,2,3]
)
reconstruction_loss *= self.r_loss_factor
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_sum(kl_loss, axis = 1)
kl_loss *= -0.5
true_samples = tf.random.normal(tf.stack([BATCH_SIZE, Z_DIM]))
loss_mmd = compute_mmd(true_samples, z)
total_loss = reconstruction_loss + loss_mmd
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
"mmd_loss": loss_mmd
}
def call(self,inputs):
latent = self.encoder(inputs)
return self.decoder(latent)
class VAE():
def __init__(self):
self.models = self._build()
self.full_model = self.models[0]
self.encoder = self.models[1]
self.decoder = self.models[2]
self.input_dim = INPUT_DIM
self.z_dim = Z_DIM
self.learning_rate = LEARNING_RATE
self.kl_tolerance = KL_TOLERANCE
def _build(self):
vae_x = Input(shape=INPUT_DIM, name='observation_input')
vae_c1 = Conv2D(filters = CONV_FILTERS[0], kernel_size = CONV_KERNEL_SIZES[0], strides = CONV_STRIDES[0], activation=CONV_ACTIVATIONS[0], name='conv_layer_1')(vae_x)
vae_c2 = Conv2D(filters = CONV_FILTERS[1], kernel_size = CONV_KERNEL_SIZES[1], strides = CONV_STRIDES[1], activation=CONV_ACTIVATIONS[0], name='conv_layer_2')(vae_c1)
vae_c3= Conv2D(filters = CONV_FILTERS[2], kernel_size = CONV_KERNEL_SIZES[2], strides = CONV_STRIDES[2], activation=CONV_ACTIVATIONS[0], name='conv_layer_3')(vae_c2)
vae_c4= Conv2D(filters = CONV_FILTERS[3], kernel_size = CONV_KERNEL_SIZES[3], strides = CONV_STRIDES[3], activation=CONV_ACTIVATIONS[0], name='conv_layer_4')(vae_c3)
vae_z_in = Flatten()(vae_c4)
vae_z_mean = Dense(Z_DIM, name='mu')(vae_z_in)
vae_z_log_var = Dense(Z_DIM, name='log_var')(vae_z_in)
vae_z = Sampling(name='z')([vae_z_mean, vae_z_log_var])
#### DECODER:
vae_z_input = Input(shape=(Z_DIM,), name='z_input')
vae_dense = Dense(1024, name='dense_layer')(vae_z_input)
vae_unflatten = Reshape((1,1,DENSE_SIZE), name='unflatten')(vae_dense)
vae_d1 = Conv2DTranspose(filters = CONV_T_FILTERS[0], kernel_size = CONV_T_KERNEL_SIZES[0] , strides = CONV_T_STRIDES[0], activation=CONV_T_ACTIVATIONS[0], name='deconv_layer_1')(vae_unflatten)
vae_d2 = Conv2DTranspose(filters = CONV_T_FILTERS[1], kernel_size = CONV_T_KERNEL_SIZES[1] , strides = CONV_T_STRIDES[1], activation=CONV_T_ACTIVATIONS[1], name='deconv_layer_2')(vae_d1)
vae_d3 = Conv2DTranspose(filters = CONV_T_FILTERS[2], kernel_size = CONV_T_KERNEL_SIZES[2] , strides = CONV_T_STRIDES[2], activation=CONV_T_ACTIVATIONS[2], name='deconv_layer_3')(vae_d2)
vae_d4 = Conv2DTranspose(filters = CONV_T_FILTERS[3], kernel_size = CONV_T_KERNEL_SIZES[3] , strides = CONV_T_STRIDES[3], activation=CONV_T_ACTIVATIONS[3], name='deconv_layer_4')(vae_d3)
#### MODELS
vae_encoder = Model(vae_x, [vae_z_mean, vae_z_log_var, vae_z], name = 'encoder')
vae_decoder = Model(vae_z_input, vae_d4, name = 'decoder')
vae_full = VAEModel(vae_encoder, vae_decoder, 10000)
opti = Adam(lr=LEARNING_RATE)
vae_full.compile(optimizer=opti)
return (vae_full,vae_encoder, vae_decoder)
def set_weights(self, filepath):
self.full_model.load_weights(filepath)
def train(self, data):
self.full_model.fit(data, data,
shuffle=True,
epochs=1,
batch_size=BATCH_SIZE)
def save_weights(self, filepath):
self.full_model.save_weights(filepath)
Problem:
vae = VAE()
vae.set_weights(filepath)
throws:
File
"/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py",
line 2200, in load_weights
'Unable to load weights saved in HDF5 format into a subclassed ' ValueError: Unable to load weights saved in HDF5 format into a
subclassed Model which has not created its variables yet. Call the
Model first, then load the weights.
I am not sure what this means since I am not that proficient in OOP. The surprising bit is that the above code was working until it stopped working. The model is training from scratch and it saves the weights in filepath. But when I am loading the same weights now it is throwing the above error!
If you set model.built = True prior to loading the model weights it works.
i was getting same same error while loading weights via
model.load_weights("Detection_model.h5")
ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.
solved it by building model before loading weights
model.build(input_shape = <INPUT_SHAPE>)
model.load_weights("Detection_model.h5")
ps, tensorflow Version: 2.5.0
What version of TF are you running? For a while the default saving format was hdf5, but this format cannot support subclassed models as easily, so you get this error. It may be solvable by first training it on a single batch and then loading the weights (to determine how the parts are connected, which is not saved in hdf5).
In the future I would recommend making sure that all saves are done with the TF file format though, it will save you from extra work.
As alwaysmvp45 pointed out "hdf5 does not store how the layers are connected". To make these layers be connected, another way is that you call the model to predict a zeros array with input shape ((1,w,h,c)) before loading weights:
model(np.zeros((1,w,h,c)))
Not sure if this has changed in more recent versions (I'm on 2.4). but I had to go this route:
# Do all the build and training
# ...
# Save the weights
model.save('path/to/location.h5')
# delete any reference to the model
del model
# Now do the load for testing
from tensorflow import keras
model = keras.models.load_model('path/to/location.h5')
If I tried the other suggestions, I got warnings about the layers not being present and I had to build the same model that I did the training on. No big deal, stick it in in a function somewhere, but this works better for me.
I have been trying to run some experiments using the deepfix tool (https://bitbucket.org/iiscseal/deepfix) which is a seq2seq model for correcting common programming errors.
I made changes to the code so that it is compatible to TF-1.12, as the original code contains tensorflow.contrib.seq2seq functions which are not supported in version TF-1.12 (only in TF-1.0.x).
The main changes were in the seq2seq_model defined in neural_net/train.py.
Below is the changed code. I'm new to the tensorflow RNN, and coded the decoder part using help from online codes.
class seq2seq_model():
PAD = 0
EOS = 1
def __init__(self, vocab_size, embedding_size, max_output_seq_len,
cell_type='LSTM', memory_dim=300, num_layers=4, dropout=0.2,
attention=True,
scope=None,
verbose=False):
assert 0 <= dropout and dropout <= 1, '0 <= dropout <= 1, you passed dropout={}'.format(
dropout)
tf.set_random_seed(1189)
self.attention = attention
self.max_output_seq_len = max_output_seq_len
self.memory_dim = memory_dim
self.num_layers = num_layers
self.dropout = dropout
self.scope = scope
if dropout != 0:
self.keep_prob = tf.placeholder(tf.float32)
else:
self.keep_prob = None
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.encoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self.decoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self._make_graph()
if self.scope is not None:
saver_vars = [var for var in tf.global_variables(
) if var.name.startswith(self.scope)]
else:
saver_vars = tf.global_variables()
if verbose:
print 'root-scope:', self.scope
print "\n\nDiscovered %d saver variables." % len(saver_vars)
for each in saver_vars:
print each.name
self.saver = tf.train.Saver(saver_vars, max_to_keep=5)
#property
def decoder_hidden_units(self):
return self.memory_dim
def _make_graph(self):
self._init_placeholders()
self._init_decoder_train_connectors()
self._init_embeddings()
self._init_simple_encoder()
self._init_decoder()
self._init_optimizer()
def _init_placeholders(self):
""" Everything is time-major """
self.encoder_inputs = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='encoder_inputs',
)
self.encoder_inputs_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='encoder_inputs_length',
)
self.decoder_targets = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='decoder_targets'
)
self.decoder_targets_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='decoder_targets_length',
)
def _init_decoder_train_connectors(self):
with tf.name_scope('decoderTrainFeeds'):
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.PAD
self.decoder_train_inputs = tf.concat(
[EOS_SLICE, self.decoder_targets], axis=0, name="decoder_train_inputs")
self.decoder_train_length = self.decoder_targets_length + 1
decoder_train_targets = tf.concat(
[self.decoder_targets, PAD_SLICE], axis=0)
decoder_train_targets_seq_len, _ = tf.unstack(
tf.shape(decoder_train_targets))
decoder_train_targets_eos_mask = tf.one_hot(self.decoder_train_length - 1,
decoder_train_targets_seq_len,
on_value=self.EOS, off_value=self.PAD,
dtype=tf.int32)
decoder_train_targets_eos_mask = tf.transpose(
decoder_train_targets_eos_mask, [1, 0])
decoder_train_targets = tf.add(decoder_train_targets,
decoder_train_targets_eos_mask, name="decoder_train_targets")
self.decoder_train_targets = decoder_train_targets
self.loss_weights = tf.ones([
batch_size,
tf.reduce_max(self.decoder_train_length)
], dtype=tf.float32, name="loss_weights")
def _init_embeddings(self):
with tf.variable_scope("embedding") as scope:
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
self.embedding_matrix = tf.get_variable(
name="embedding_matrix",
shape=[self.vocab_size, self.embedding_size],
initializer=initializer,
dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.encoder_inputs,
name="encoder_inputs_embedded")
self.decoder_train_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.decoder_train_inputs,
name="decoder_train_inputs_embedded")
def _init_simple_encoder(self):
with tf.variable_scope("Encoder") as scope:
(self.encoder_outputs, self.encoder_state) = (
tf.nn.dynamic_rnn(cell=self.encoder_cell,
inputs=self.encoder_inputs_embedded,
sequence_length=self.encoder_inputs_length,
time_major=True,
dtype=tf.float32)
)
def _init_decoder(self):
with tf.variable_scope("decoder") as scope:
# def output_fn(outputs):
# return tf.contrib.layers.fully_connected(outputs, self.vocab_size, scope=scope,
# name = "output_fn")
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
train_helper = seq2seq.TrainingHelper(
inputs=self.decoder_train_inputs_embedded,
sequence_length=self.decoder_train_length,
time_major=True,
name="train_helper")
pred_helper = seq2seq.SampleEmbeddingHelper(
embedding=self.embedding_matrix,
start_tokens=tf.ones([batch_size], dtype=tf.int32) * self.EOS,
end_token=self.EOS)
# name="pred_helper")
def _decode(helper, scope, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
attention_states = tf.transpose(
self.encoder_outputs, [1, 0, 2])
attention_mechanism = seq2seq.BahdanauAttention(
num_units=self.decoder_hidden_units, memory=attention_states,
name="attention_mechanism")
attention_cell = seq2seq.AttentionWrapper(
self.decoder_cell, attention_mechanism,
name="atttention_wrapper")
out_cell = tf.contrib.rnn.OutputProjectionWrapper(
attention_cell, self.vocab_size, reuse=reuse)
# name="output_cell")
decoder = seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
# name="decoder")
outputs = seq2seq.dynamic_decode(
decoder=decoder, output_time_major=True,
impute_finished=True)
# name="outputs")
return outputs
(self.decoder_logits_train, self.decoder_state_train, _) = _decode(train_helper, "decoder")
(self.decoder_logits_inference, self.decoder_state_inference, _) = _decode(pred_helper, "decoder", reuse=True)
self.decoder_logits_train = self.decoder_logits_train.rnn_output
self.decoder_logits_inference = self.decoder_logits_inference.rnn_output
# self.decoder_logits_train = output_fn(self.decoder_outputs_train)
self.decoder_prediction_train = tf.argmax(
self.decoder_logits_train, axis=-1, name='decoder_prediction_train')
scope.reuse_variables()
self.decoder_prediction_inference = tf.argmax(self.decoder_logits_inference, axis=-1,
name='decoder_prediction_inference')
def _init_optimizer(self):
logits = tf.transpose(self.decoder_logits_train, [1, 0, 2])
targets = tf.transpose(self.decoder_train_targets, [1, 0])
self.loss = seq2seq.sequence_loss(logits=logits, targets=targets,
weights=self.loss_weights)
self.optimizer = tf.train.AdamOptimizer()
gvs = self.optimizer.compute_gradients(self.loss)
def ClipIfNotNone(grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1., 1)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(capped_gvs)
def make_feed_dict(self, x, x_len, y, y_len):
feed_dict = {
self.encoder_inputs: x,
self.encoder_inputs_length: x_len,
self.decoder_targets: y,
self.decoder_targets_length: y_len,
}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0 - self.dropout})
return feed_dict
def load_parameters(self, sess, filename):
self.saver.restore(sess, filename)
def save_parameters(self, sess, filename, global_step=None):
self.saver.save(sess, filename, global_step=global_step)
def train_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
_, loss = session.run([self.train_op, self.loss], feed_dict)
return loss
def validate_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
loss, decoder_prediction, decoder_train_targets = session.run([self.loss,
self.decoder_prediction_inference,
self.decoder_train_targets], feed_dict)
return loss, np.array(decoder_prediction).T, np.array(decoder_train_targets).T
def sample(self, session, X, X_len):
feed_dict = {self.encoder_inputs: X,
self.encoder_inputs_length: X_len}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0})
decoder_prediction = session.run(
self.decoder_prediction_inference, feed_dict)
return np.array(decoder_prediction).T
I am having some problems with this code:
Main problem - The seq2seq.train_step() and seq2seq.validate_step() functions are working, but when I use seq2seq.sample() for actually making inferences, I get an error that asks me to feed a value for decoder_targets. This is an unexpected behaviour as the SampleEmbeddingHelper function is used for inference which does not require decoder_targets. The error:
InvalidArgumentError (see above for traceback): You must feed a value
for placeholder tensor 'ids/decoder_targets' with dtype int32 and
shape [?,?] [[node ids/decoder_targets (defined at
.../code/neural_net/train.py:241) = Placeholderdtype=DT_INT32,
shape=[?,?],
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
When I try to use the GreedyEmbeddingHelper instead of SampleEmbeddingHelper, and then run decoder_logits_inference op, the machine hangs and runs out of memory after some time. Although SampleEmbeddingHelper works fine.
Well, SampleEmbeddingHelper does need decoder targets, since it mixes part of GreedyEmbeddingHelper(infer mode) and tf.contrib.seq2seq.TrainingHelper(teacher forcing). I think you just need to use GreedyEmbeddingHelper.
Since in the beginning, the parameters are totally random (if not pre-trained).
Maybe you have seen that the results of the first few loops of seq2seq model are totally messed up.
So if you use GreedyEmbeddingHelper, which outputs a result based on the previous one, and of course no one teaches it "where to stop", so it usually goes infinitely until your memory runs out. To solve this, you need to set an upper limit for the length of sentence in tf.contrib.seq2seq.dynamic_decode.
The argument is maximum_iterations. as shown in
tf.contrib.seq2seq.dynamic_decode